1 //
   2 // Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Common Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  63 // Word a in each register holds a Float, words ab hold a Double.
  64 // The whole registers are used in SSE4.2 version intrinsics,
  65 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  66 // UseXMMForArrayCopy and UseSuperword flags).
  67 // For pre EVEX enabled architectures:
  68 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  69 // For EVEX enabled architectures:
  70 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  71 //
  72 // Linux ABI:   No register preserved across function calls
  73 //              XMM0-XMM7 might hold parameters
  74 // Windows ABI: XMM6-XMM31 preserved across function calls
  75 //              XMM0-XMM3 might hold parameters
  76 
  77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  93 
  94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
 110 
 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
 127 
 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
 144 
 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
 161 
 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
 178 
 179 #ifdef _WIN64
 180 
 181 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
 182 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
 183 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
 184 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
 185 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
 186 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
 187 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
 188 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
 189 reg_def XMM6i( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(8));
 190 reg_def XMM6j( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(9));
 191 reg_def XMM6k( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(10));
 192 reg_def XMM6l( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(11));
 193 reg_def XMM6m( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(12));
 194 reg_def XMM6n( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(13));
 195 reg_def XMM6o( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(14));
 196 reg_def XMM6p( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(15));
 197 
 198 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
 199 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
 200 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
 201 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
 202 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
 203 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
 204 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
 205 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
 206 reg_def XMM7i( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(8));
 207 reg_def XMM7j( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(9));
 208 reg_def XMM7k( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(10));
 209 reg_def XMM7l( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(11));
 210 reg_def XMM7m( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(12));
 211 reg_def XMM7n( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(13));
 212 reg_def XMM7o( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(14));
 213 reg_def XMM7p( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(15));
 214 
 215 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
 216 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
 217 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
 218 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
 219 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
 220 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
 221 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
 222 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
 223 reg_def XMM8i( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(8));
 224 reg_def XMM8j( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(9));
 225 reg_def XMM8k( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(10));
 226 reg_def XMM8l( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(11));
 227 reg_def XMM8m( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(12));
 228 reg_def XMM8n( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(13));
 229 reg_def XMM8o( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(14));
 230 reg_def XMM8p( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(15));
 231 
 232 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
 233 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
 234 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
 235 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
 236 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
 237 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
 238 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
 239 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
 240 reg_def XMM9i( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(8));
 241 reg_def XMM9j( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(9));
 242 reg_def XMM9k( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(10));
 243 reg_def XMM9l( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(11));
 244 reg_def XMM9m( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(12));
 245 reg_def XMM9n( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(13));
 246 reg_def XMM9o( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(14));
 247 reg_def XMM9p( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(15));
 248 
 249 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 250 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
 251 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
 252 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
 253 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
 254 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
 255 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
 256 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
 257 reg_def XMM10i( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(8));
 258 reg_def XMM10j( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(9));
 259 reg_def XMM10k( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(10));
 260 reg_def XMM10l( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(11));
 261 reg_def XMM10m( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(12));
 262 reg_def XMM10n( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(13));
 263 reg_def XMM10o( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(14));
 264 reg_def XMM10p( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(15));
 265 
 266 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 267 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
 268 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
 269 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
 270 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
 271 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
 272 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
 273 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
 274 reg_def XMM11i( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(8));
 275 reg_def XMM11j( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(9));
 276 reg_def XMM11k( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(10));
 277 reg_def XMM11l( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(11));
 278 reg_def XMM11m( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(12));
 279 reg_def XMM11n( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(13));
 280 reg_def XMM11o( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(14));
 281 reg_def XMM11p( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(15));
 282 
 283 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 284 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
 285 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
 286 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
 287 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
 288 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
 289 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
 290 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
 291 reg_def XMM12i( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(8));
 292 reg_def XMM12j( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(9));
 293 reg_def XMM12k( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(10));
 294 reg_def XMM12l( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(11));
 295 reg_def XMM12m( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(12));
 296 reg_def XMM12n( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(13));
 297 reg_def XMM12o( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(14));
 298 reg_def XMM12p( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(15));
 299 
 300 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 301 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
 302 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
 303 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
 304 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
 305 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
 306 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
 307 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
 308 reg_def XMM13i( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(8));
 309 reg_def XMM13j( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(9));
 310 reg_def XMM13k( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(10));
 311 reg_def XMM13l( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(11));
 312 reg_def XMM13m( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(12));
 313 reg_def XMM13n( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(13));
 314 reg_def XMM13o( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(14));
 315 reg_def XMM13p( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(15));
 316 
 317 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 318 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
 319 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
 320 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
 321 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
 322 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
 323 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
 324 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
 325 reg_def XMM14i( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(8));
 326 reg_def XMM14j( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(9));
 327 reg_def XMM14k( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(10));
 328 reg_def XMM14l( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(11));
 329 reg_def XMM14m( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(12));
 330 reg_def XMM14n( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(13));
 331 reg_def XMM14o( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(14));
 332 reg_def XMM14p( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(15));
 333 
 334 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 335 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
 336 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
 337 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
 338 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
 339 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
 340 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
 341 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
 342 reg_def XMM15i( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(8));
 343 reg_def XMM15j( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(9));
 344 reg_def XMM15k( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(10));
 345 reg_def XMM15l( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(11));
 346 reg_def XMM15m( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(12));
 347 reg_def XMM15n( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(13));
 348 reg_def XMM15o( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(14));
 349 reg_def XMM15p( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(15));
 350 
 351 reg_def XMM16 ( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg());
 352 reg_def XMM16b( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(1));
 353 reg_def XMM16c( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(2));
 354 reg_def XMM16d( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(3));
 355 reg_def XMM16e( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(4));
 356 reg_def XMM16f( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(5));
 357 reg_def XMM16g( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(6));
 358 reg_def XMM16h( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(7));
 359 reg_def XMM16i( SOC, SOE, Op_RegF, 16, xmm15->as_VMReg()->next(8));
 360 reg_def XMM16j( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(9));
 361 reg_def XMM16k( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(10));
 362 reg_def XMM16l( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(11));
 363 reg_def XMM16m( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(12));
 364 reg_def XMM16n( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(13));
 365 reg_def XMM16o( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(14));
 366 reg_def XMM16p( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(15));
 367 
 368 reg_def XMM17 ( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg());
 369 reg_def XMM17b( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(1));
 370 reg_def XMM17c( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(2));
 371 reg_def XMM17d( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(3));
 372 reg_def XMM17e( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(4));
 373 reg_def XMM17f( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(5));
 374 reg_def XMM17g( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(6));
 375 reg_def XMM17h( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(7));
 376 reg_def XMM17i( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(8));
 377 reg_def XMM17j( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(9));
 378 reg_def XMM17k( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(10));
 379 reg_def XMM17l( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(11));
 380 reg_def XMM17m( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(12));
 381 reg_def XMM17n( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(13));
 382 reg_def XMM17o( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(14));
 383 reg_def XMM17p( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(15));
 384 
 385 reg_def XMM18 ( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg());
 386 reg_def XMM18b( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(1));
 387 reg_def XMM18c( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(2));
 388 reg_def XMM18d( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(3));
 389 reg_def XMM18e( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(4));
 390 reg_def XMM18f( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(5));
 391 reg_def XMM18g( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(6));
 392 reg_def XMM18h( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(7));
 393 reg_def XMM18i( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(8));
 394 reg_def XMM18j( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(9));
 395 reg_def XMM18k( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(10));
 396 reg_def XMM18l( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(11));
 397 reg_def XMM18m( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(12));
 398 reg_def XMM18n( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(13));
 399 reg_def XMM18o( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(14));
 400 reg_def XMM18p( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(15));
 401 
 402 reg_def XMM19 ( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg());
 403 reg_def XMM19b( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(1));
 404 reg_def XMM19c( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(2));
 405 reg_def XMM19d( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(3));
 406 reg_def XMM19e( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(4));
 407 reg_def XMM19f( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(5));
 408 reg_def XMM19g( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(6));
 409 reg_def XMM19h( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(7));
 410 reg_def XMM19i( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(8));
 411 reg_def XMM19j( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(9));
 412 reg_def XMM19k( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(10));
 413 reg_def XMM19l( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(11));
 414 reg_def XMM19m( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(12));
 415 reg_def XMM19n( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(13));
 416 reg_def XMM19o( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(14));
 417 reg_def XMM19p( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(15));
 418 
 419 reg_def XMM20 ( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg());
 420 reg_def XMM20b( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(1));
 421 reg_def XMM20c( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(2));
 422 reg_def XMM20d( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(3));
 423 reg_def XMM20e( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(4));
 424 reg_def XMM20f( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(5));
 425 reg_def XMM20g( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(6));
 426 reg_def XMM20h( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(7));
 427 reg_def XMM20i( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(8));
 428 reg_def XMM20j( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(9));
 429 reg_def XMM20k( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(10));
 430 reg_def XMM20l( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(11));
 431 reg_def XMM20m( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(12));
 432 reg_def XMM20n( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(13));
 433 reg_def XMM20o( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(14));
 434 reg_def XMM20p( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(15));
 435 
 436 reg_def XMM21 ( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg());
 437 reg_def XMM21b( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(1));
 438 reg_def XMM21c( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(2));
 439 reg_def XMM21d( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(3));
 440 reg_def XMM21e( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(4));
 441 reg_def XMM21f( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(5));
 442 reg_def XMM21g( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(6));
 443 reg_def XMM21h( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(7));
 444 reg_def XMM21i( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(8));
 445 reg_def XMM21j( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(9));
 446 reg_def XMM21k( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(10));
 447 reg_def XMM21l( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(11));
 448 reg_def XMM21m( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(12));
 449 reg_def XMM21n( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(13));
 450 reg_def XMM21o( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(14));
 451 reg_def XMM21p( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(15));
 452 
 453 reg_def XMM22 ( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg());
 454 reg_def XMM22b( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(1));
 455 reg_def XMM22c( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(2));
 456 reg_def XMM22d( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(3));
 457 reg_def XMM22e( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(4));
 458 reg_def XMM22f( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(5));
 459 reg_def XMM22g( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(6));
 460 reg_def XMM22h( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(7));
 461 reg_def XMM22i( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(8));
 462 reg_def XMM22j( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(9));
 463 reg_def XMM22k( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(10));
 464 reg_def XMM22l( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(11));
 465 reg_def XMM22m( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(12));
 466 reg_def XMM22n( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(13));
 467 reg_def XMM22o( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(14));
 468 reg_def XMM22p( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(15));
 469 
 470 reg_def XMM23 ( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg());
 471 reg_def XMM23b( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(1));
 472 reg_def XMM23c( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(2));
 473 reg_def XMM23d( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(3));
 474 reg_def XMM23e( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(4));
 475 reg_def XMM23f( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(5));
 476 reg_def XMM23g( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(6));
 477 reg_def XMM23h( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(7));
 478 reg_def XMM23i( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(8));
 479 reg_def XMM23j( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(9));
 480 reg_def XMM23k( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(10));
 481 reg_def XMM23l( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(11));
 482 reg_def XMM23m( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(12));
 483 reg_def XMM23n( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(13));
 484 reg_def XMM23o( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(14));
 485 reg_def XMM23p( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(15));
 486 
 487 reg_def XMM24 ( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg());
 488 reg_def XMM24b( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(1));
 489 reg_def XMM24c( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(2));
 490 reg_def XMM24d( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(3));
 491 reg_def XMM24e( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(4));
 492 reg_def XMM24f( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(5));
 493 reg_def XMM24g( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(6));
 494 reg_def XMM24h( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(7));
 495 reg_def XMM24i( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(8));
 496 reg_def XMM24j( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(9));
 497 reg_def XMM24k( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(10));
 498 reg_def XMM24l( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(11));
 499 reg_def XMM24m( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(12));
 500 reg_def XMM24n( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(13));
 501 reg_def XMM24o( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(14));
 502 reg_def XMM24p( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(15));
 503 
 504 reg_def XMM25 ( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg());
 505 reg_def XMM25b( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(1));
 506 reg_def XMM25c( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(2));
 507 reg_def XMM25d( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(3));
 508 reg_def XMM25e( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(4));
 509 reg_def XMM25f( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(5));
 510 reg_def XMM25g( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(6));
 511 reg_def XMM25h( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(7));
 512 reg_def XMM25i( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(8));
 513 reg_def XMM25j( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(9));
 514 reg_def XMM25k( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(10));
 515 reg_def XMM25l( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(11));
 516 reg_def XMM25m( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(12));
 517 reg_def XMM25n( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(13));
 518 reg_def XMM25o( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(14));
 519 reg_def XMM25p( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(15));
 520 
 521 reg_def XMM26 ( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg());
 522 reg_def XMM26b( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(1));
 523 reg_def XMM26c( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(2));
 524 reg_def XMM26d( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(3));
 525 reg_def XMM26e( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(4));
 526 reg_def XMM26f( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(5));
 527 reg_def XMM26g( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(6));
 528 reg_def XMM26h( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(7));
 529 reg_def XMM26i( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(8));
 530 reg_def XMM26j( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(9));
 531 reg_def XMM26k( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(10));
 532 reg_def XMM26l( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(11));
 533 reg_def XMM26m( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(12));
 534 reg_def XMM26n( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(13));
 535 reg_def XMM26o( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(14));
 536 reg_def XMM26p( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(15));
 537 
 538 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(1));
 539 reg_def XMM27c( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(2));
 540 reg_def XMM27d( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(3));
 541 reg_def XMM27e( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(4));
 542 reg_def XMM27f( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(5));
 543 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(6));
 544 reg_def XMM27h( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(7));
 545 reg_def XMM27i( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(8));
 546 reg_def XMM27j( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(9));
 547 reg_def XMM27k( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(10));
 548 reg_def XMM27l( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(11));
 549 reg_def XMM27m( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(12));
 550 reg_def XMM27n( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(13));
 551 reg_def XMM27o( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(14));
 552 reg_def XMM27p( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(15));
 553 
 554 reg_def XMM28 ( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg());
 555 reg_def XMM28b( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(1));
 556 reg_def XMM28c( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(2));
 557 reg_def XMM28d( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(3));
 558 reg_def XMM28e( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(4));
 559 reg_def XMM28f( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(5));
 560 reg_def XMM28g( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(6));
 561 reg_def XMM28h( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(7));
 562 reg_def XMM28i( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(8));
 563 reg_def XMM28j( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(9));
 564 reg_def XMM28k( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(10));
 565 reg_def XMM28l( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(11));
 566 reg_def XMM28m( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(12));
 567 reg_def XMM28n( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(13));
 568 reg_def XMM28o( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(14));
 569 reg_def XMM28p( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(15));
 570 
 571 reg_def XMM29 ( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg());
 572 reg_def XMM29b( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(1));
 573 reg_def XMM29c( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(2));
 574 reg_def XMM29d( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(3));
 575 reg_def XMM29e( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(4));
 576 reg_def XMM29f( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(5));
 577 reg_def XMM29g( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(6));
 578 reg_def XMM29h( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(7));
 579 reg_def XMM29i( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(8));
 580 reg_def XMM29j( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(9));
 581 reg_def XMM29k( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(10));
 582 reg_def XMM29l( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(11));
 583 reg_def XMM29m( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(12));
 584 reg_def XMM29n( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(13));
 585 reg_def XMM29o( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(14));
 586 reg_def XMM29p( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(15));
 587 
 588 reg_def XMM30 ( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg());
 589 reg_def XMM30b( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(1));
 590 reg_def XMM30c( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(2));
 591 reg_def XMM30d( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(3));
 592 reg_def XMM30e( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(4));
 593 reg_def XMM30f( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(5));
 594 reg_def XMM30g( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(6));
 595 reg_def XMM30h( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(7));
 596 reg_def XMM30i( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(8));
 597 reg_def XMM30j( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(9));
 598 reg_def XMM30k( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(10));
 599 reg_def XMM30l( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(11));
 600 reg_def XMM30m( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(12));
 601 reg_def XMM30n( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(13));
 602 reg_def XMM30o( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(14));
 603 reg_def XMM30p( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(15));
 604 
 605 reg_def XMM31 ( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg());
 606 reg_def XMM31b( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(1));
 607 reg_def XMM31c( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(2));
 608 reg_def XMM31d( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(3));
 609 reg_def XMM31e( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(4));
 610 reg_def XMM31f( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(5));
 611 reg_def XMM31g( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(6));
 612 reg_def XMM31h( SOC, SOE, Op_RegF, 31, xmm31>-as_VMReg()->next(7));
 613 reg_def XMM31i( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(8));
 614 reg_def XMM31j( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(9));
 615 reg_def XMM31k( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(10));
 616 reg_def XMM31l( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(11));
 617 reg_def XMM31m( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(12));
 618 reg_def XMM31n( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(13));
 619 reg_def XMM31o( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(14));
 620 reg_def XMM31p( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(15));
 621 
 622 #else // _WIN64
 623 
 624 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
 625 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
 626 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
 627 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
 628 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
 629 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
 630 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
 631 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
 632 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
 633 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
 634 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
 635 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
 636 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
 637 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
 638 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
 639 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
 640 
 641 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
 642 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
 643 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
 644 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
 645 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
 646 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
 647 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
 648 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
 649 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
 650 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
 651 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
 652 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
 653 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
 654 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
 655 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
 656 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
 657 
 658 #ifdef _LP64
 659 
 660 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
 661 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
 662 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
 663 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
 664 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
 665 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
 666 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
 667 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
 668 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
 669 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
 670 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
 671 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
 672 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
 673 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
 674 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
 675 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
 676 
 677 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
 678 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
 679 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
 680 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
 681 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
 682 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
 683 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
 684 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
 685 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
 686 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
 687 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
 688 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
 689 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
 690 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
 691 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
 692 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
 693 
 694 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 695 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
 696 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
 697 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
 698 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
 699 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
 700 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
 701 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
 702 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
 703 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
 704 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
 705 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
 706 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
 707 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
 708 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
 709 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
 710 
 711 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 712 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
 713 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
 714 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
 715 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
 716 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
 717 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
 718 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
 719 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
 720 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
 721 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
 722 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
 723 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
 724 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
 725 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
 726 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
 727 
 728 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 729 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
 730 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
 731 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
 732 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
 733 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
 734 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
 735 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
 736 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
 737 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
 738 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
 739 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
 740 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
 741 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
 742 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
 743 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
 744 
 745 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 746 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
 747 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
 748 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
 749 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
 750 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
 751 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
 752 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
 753 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
 754 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
 755 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
 756 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
 757 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
 758 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
 759 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
 760 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
 761 
 762 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 763 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
 764 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
 765 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
 766 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
 767 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
 768 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
 769 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
 770 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
 771 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
 772 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
 773 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
 774 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
 775 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
 776 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
 777 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
 778 
 779 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 780 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
 781 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
 782 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
 783 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
 784 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
 785 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
 786 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
 787 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
 788 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
 789 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
 790 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
 791 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
 792 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
 793 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
 794 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
 795 
 796 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
 797 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
 798 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
 799 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
 800 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
 801 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
 802 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
 803 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
 804 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
 805 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
 806 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
 807 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
 808 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
 809 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
 810 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
 811 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
 812 
 813 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
 814 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
 815 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
 816 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
 817 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
 818 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
 819 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
 820 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
 821 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
 822 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
 823 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
 824 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
 825 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
 826 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
 827 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
 828 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
 829 
 830 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
 831 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
 832 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
 833 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
 834 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
 835 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
 836 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
 837 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
 838 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
 839 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
 840 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
 841 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
 842 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
 843 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
 844 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
 845 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
 846 
 847 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
 848 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
 849 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
 850 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
 851 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
 852 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
 853 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
 854 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
 855 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
 856 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
 857 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
 858 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
 859 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
 860 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
 861 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
 862 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
 863 
 864 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
 865 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
 866 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
 867 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
 868 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
 869 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
 870 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
 871 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
 872 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
 873 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
 874 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
 875 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
 876 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
 877 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
 878 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
 879 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
 880 
 881 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
 882 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
 883 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
 884 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
 885 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
 886 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
 887 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
 888 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
 889 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
 890 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
 891 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
 892 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
 893 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
 894 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
 895 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
 896 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
 897 
 898 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
 899 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
 900 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
 901 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
 902 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
 903 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
 904 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
 905 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
 906 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
 907 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
 908 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
 909 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
 910 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
 911 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
 912 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
 913 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
 914 
 915 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
 916 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
 917 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
 918 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
 919 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
 920 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
 921 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
 922 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
 923 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
 924 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
 925 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
 926 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
 927 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
 928 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
 929 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
 930 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
 931 
 932 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
 933 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
 934 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
 935 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
 936 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
 937 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
 938 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
 939 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
 940 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
 941 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
 942 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
 943 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
 944 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
 945 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
 946 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
 947 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
 948 
 949 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
 950 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
 951 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
 952 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
 953 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
 954 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
 955 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
 956 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
 957 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
 958 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
 959 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
 960 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
 961 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
 962 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
 963 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
 964 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
 965 
 966 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
 967 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
 968 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
 969 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
 970 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
 971 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
 972 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
 973 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
 974 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
 975 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
 976 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
 977 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
 978 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
 979 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
 980 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
 981 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
 982 
 983 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
 984 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
 985 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
 986 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
 987 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
 988 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
 989 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
 990 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
 991 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
 992 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
 993 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
 994 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
 995 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
 996 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
 997 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
 998 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
 999 
1000 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
1001 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
1002 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
1003 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
1004 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
1005 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
1006 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
1007 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
1008 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
1009 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
1010 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
1011 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
1012 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
1013 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
1014 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
1015 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
1016 
1017 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
1018 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
1019 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
1020 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
1021 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
1022 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
1023 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
1024 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
1025 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
1026 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
1027 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
1028 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
1029 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
1030 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
1031 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
1032 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
1033 
1034 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
1035 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
1036 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
1037 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
1038 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
1039 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
1040 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
1041 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
1042 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
1043 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
1044 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
1045 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
1046 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
1047 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
1048 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
1049 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
1050 
1051 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
1052 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
1053 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
1054 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
1055 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
1056 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
1057 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
1058 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
1059 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
1060 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
1061 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
1062 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
1063 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
1064 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
1065 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
1066 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
1067 
1068 #endif // _LP64
1069 
1070 #endif // _WIN64
1071 
1072 #ifdef _LP64
1073 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
1074 #else
1075 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
1076 #endif // _LP64
1077 
1078 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
1079                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
1080                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
1081                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
1082                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
1083                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
1084                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
1085                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p
1086 #ifdef _LP64
1087                   ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
1088                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
1089                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1090                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1091                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1092                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1093                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1094                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
1095                   ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1096                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1097                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1098                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1099                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1100                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1101                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1102                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1103                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1104                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1105                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1106                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1107                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1108                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1109                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1110                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p
1111 #endif
1112                       );
1113 
1114 // flags allocation class should be last.
1115 alloc_class chunk2(RFLAGS);
1116 
1117 // Singleton class for condition codes
1118 reg_class int_flags(RFLAGS);
1119 
1120 // Class for pre evex float registers
1121 reg_class float_reg_legacy(XMM0,
1122                     XMM1,
1123                     XMM2,
1124                     XMM3,
1125                     XMM4,
1126                     XMM5,
1127                     XMM6,
1128                     XMM7
1129 #ifdef _LP64
1130                    ,XMM8,
1131                     XMM9,
1132                     XMM10,
1133                     XMM11,
1134                     XMM12,
1135                     XMM13,
1136                     XMM14,
1137                     XMM15
1138 #endif
1139                     );
1140 
1141 // Class for evex float registers
1142 reg_class float_reg_evex(XMM0,
1143                     XMM1,
1144                     XMM2,
1145                     XMM3,
1146                     XMM4,
1147                     XMM5,
1148                     XMM6,
1149                     XMM7
1150 #ifdef _LP64
1151                    ,XMM8,
1152                     XMM9,
1153                     XMM10,
1154                     XMM11,
1155                     XMM12,
1156                     XMM13,
1157                     XMM14,
1158                     XMM15,
1159                     XMM16,
1160                     XMM17,
1161                     XMM18,
1162                     XMM19,
1163                     XMM20,
1164                     XMM21,
1165                     XMM22,
1166                     XMM23,
1167                     XMM24,
1168                     XMM25,
1169                     XMM26,
1170                     XMM27,
1171                     XMM28,
1172                     XMM29,
1173                     XMM30,
1174                     XMM31
1175 #endif
1176                     );
1177 
1178 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1179 
1180 // Class for pre evex double registers
1181 reg_class double_reg_legacy(XMM0,  XMM0b,
1182                      XMM1,  XMM1b,
1183                      XMM2,  XMM2b,
1184                      XMM3,  XMM3b,
1185                      XMM4,  XMM4b,
1186                      XMM5,  XMM5b,
1187                      XMM6,  XMM6b,
1188                      XMM7,  XMM7b
1189 #ifdef _LP64
1190                     ,XMM8,  XMM8b,
1191                      XMM9,  XMM9b,
1192                      XMM10, XMM10b,
1193                      XMM11, XMM11b,
1194                      XMM12, XMM12b,
1195                      XMM13, XMM13b,
1196                      XMM14, XMM14b,
1197                      XMM15, XMM15b
1198 #endif
1199                      );
1200 
1201 // Class for evex double registers
1202 reg_class double_reg_evex(XMM0,  XMM0b,
1203                      XMM1,  XMM1b,
1204                      XMM2,  XMM2b,
1205                      XMM3,  XMM3b,
1206                      XMM4,  XMM4b,
1207                      XMM5,  XMM5b,
1208                      XMM6,  XMM6b,
1209                      XMM7,  XMM7b
1210 #ifdef _LP64
1211                     ,XMM8,  XMM8b,
1212                      XMM9,  XMM9b,
1213                      XMM10, XMM10b,
1214                      XMM11, XMM11b,
1215                      XMM12, XMM12b,
1216                      XMM13, XMM13b,
1217                      XMM14, XMM14b,
1218                      XMM15, XMM15b,
1219                      XMM16, XMM16b,
1220                      XMM17, XMM17b,
1221                      XMM18, XMM18b,
1222                      XMM19, XMM19b,
1223                      XMM20, XMM20b,
1224                      XMM21, XMM21b,
1225                      XMM22, XMM22b,
1226                      XMM23, XMM23b,
1227                      XMM24, XMM24b,
1228                      XMM25, XMM25b,
1229                      XMM26, XMM26b,
1230                      XMM27, XMM27b,
1231                      XMM28, XMM28b,
1232                      XMM29, XMM29b,
1233                      XMM30, XMM30b,
1234                      XMM31, XMM31b
1235 #endif
1236                      );
1237 
1238 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1239 
1240 // Class for pre evex 32bit vector registers
1241 reg_class vectors_reg_legacy(XMM0,
1242                       XMM1,
1243                       XMM2,
1244                       XMM3,
1245                       XMM4,
1246                       XMM5,
1247                       XMM6,
1248                       XMM7
1249 #ifdef _LP64
1250                      ,XMM8,
1251                       XMM9,
1252                       XMM10,
1253                       XMM11,
1254                       XMM12,
1255                       XMM13,
1256                       XMM14,
1257                       XMM15
1258 #endif
1259                       );
1260 
1261 // Class for evex 32bit vector registers
1262 reg_class vectors_reg_evex(XMM0,
1263                       XMM1,
1264                       XMM2,
1265                       XMM3,
1266                       XMM4,
1267                       XMM5,
1268                       XMM6,
1269                       XMM7
1270 #ifdef _LP64
1271                      ,XMM8,
1272                       XMM9,
1273                       XMM10,
1274                       XMM11,
1275                       XMM12,
1276                       XMM13,
1277                       XMM14,
1278                       XMM15,
1279                       XMM16,
1280                       XMM17,
1281                       XMM18,
1282                       XMM19,
1283                       XMM20,
1284                       XMM21,
1285                       XMM22,
1286                       XMM23,
1287                       XMM24,
1288                       XMM25,
1289                       XMM26,
1290                       XMM27,
1291                       XMM28,
1292                       XMM29,
1293                       XMM30,
1294                       XMM31
1295 #endif
1296                       );
1297 
1298 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1299 
1300 // Class for all 64bit vector registers
1301 reg_class vectord_reg_legacy(XMM0,  XMM0b,
1302                       XMM1,  XMM1b,
1303                       XMM2,  XMM2b,
1304                       XMM3,  XMM3b,
1305                       XMM4,  XMM4b,
1306                       XMM5,  XMM5b,
1307                       XMM6,  XMM6b,
1308                       XMM7,  XMM7b
1309 #ifdef _LP64
1310                      ,XMM8,  XMM8b,
1311                       XMM9,  XMM9b,
1312                       XMM10, XMM10b,
1313                       XMM11, XMM11b,
1314                       XMM12, XMM12b,
1315                       XMM13, XMM13b,
1316                       XMM14, XMM14b,
1317                       XMM15, XMM15b
1318 #endif
1319                       );
1320 
1321 // Class for all 64bit vector registers
1322 reg_class vectord_reg_evex(XMM0,  XMM0b,
1323                       XMM1,  XMM1b,
1324                       XMM2,  XMM2b,
1325                       XMM3,  XMM3b,
1326                       XMM4,  XMM4b,
1327                       XMM5,  XMM5b,
1328                       XMM6,  XMM6b,
1329                       XMM7,  XMM7b
1330 #ifdef _LP64
1331                      ,XMM8,  XMM8b,
1332                       XMM9,  XMM9b,
1333                       XMM10, XMM10b,
1334                       XMM11, XMM11b,
1335                       XMM12, XMM12b,
1336                       XMM13, XMM13b,
1337                       XMM14, XMM14b,
1338                       XMM15, XMM15b,
1339                       XMM16, XMM16b,
1340                       XMM17, XMM17b,
1341                       XMM18, XMM18b,
1342                       XMM19, XMM19b,
1343                       XMM20, XMM20b,
1344                       XMM21, XMM21b,
1345                       XMM22, XMM22b,
1346                       XMM23, XMM23b,
1347                       XMM24, XMM24b,
1348                       XMM25, XMM25b,
1349                       XMM26, XMM26b,
1350                       XMM27, XMM27b,
1351                       XMM28, XMM28b,
1352                       XMM29, XMM29b,
1353                       XMM30, XMM30b,
1354                       XMM31, XMM31b
1355 #endif
1356                       );
1357 
1358 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1359 
1360 // Class for all 128bit vector registers
1361 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
1362                       XMM1,  XMM1b,  XMM1c,  XMM1d,
1363                       XMM2,  XMM2b,  XMM2c,  XMM2d,
1364                       XMM3,  XMM3b,  XMM3c,  XMM3d,
1365                       XMM4,  XMM4b,  XMM4c,  XMM4d,
1366                       XMM5,  XMM5b,  XMM5c,  XMM5d,
1367                       XMM6,  XMM6b,  XMM6c,  XMM6d,
1368                       XMM7,  XMM7b,  XMM7c,  XMM7d
1369 #ifdef _LP64
1370                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,
1371                       XMM9,  XMM9b,  XMM9c,  XMM9d,
1372                       XMM10, XMM10b, XMM10c, XMM10d,
1373                       XMM11, XMM11b, XMM11c, XMM11d,
1374                       XMM12, XMM12b, XMM12c, XMM12d,
1375                       XMM13, XMM13b, XMM13c, XMM13d,
1376                       XMM14, XMM14b, XMM14c, XMM14d,
1377                       XMM15, XMM15b, XMM15c, XMM15d
1378 #endif
1379                       );
1380 
1381 // Class for all 128bit vector registers
1382 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
1383                       XMM1,  XMM1b,  XMM1c,  XMM1d,
1384                       XMM2,  XMM2b,  XMM2c,  XMM2d,
1385                       XMM3,  XMM3b,  XMM3c,  XMM3d,
1386                       XMM4,  XMM4b,  XMM4c,  XMM4d,
1387                       XMM5,  XMM5b,  XMM5c,  XMM5d,
1388                       XMM6,  XMM6b,  XMM6c,  XMM6d,
1389                       XMM7,  XMM7b,  XMM7c,  XMM7d
1390 #ifdef _LP64
1391                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,
1392                       XMM9,  XMM9b,  XMM9c,  XMM9d,
1393                       XMM10, XMM10b, XMM10c, XMM10d,
1394                       XMM11, XMM11b, XMM11c, XMM11d,
1395                       XMM12, XMM12b, XMM12c, XMM12d,
1396                       XMM13, XMM13b, XMM13c, XMM13d,
1397                       XMM14, XMM14b, XMM14c, XMM14d,
1398                       XMM15, XMM15b, XMM15c, XMM15d,
1399                       XMM16, XMM16b, XMM16c, XMM16d,
1400                       XMM17, XMM17b, XMM17c, XMM17d,
1401                       XMM18, XMM18b, XMM18c, XMM18d,
1402                       XMM19, XMM19b, XMM19c, XMM19d,
1403                       XMM20, XMM20b, XMM20c, XMM20d,
1404                       XMM21, XMM21b, XMM21c, XMM21d,
1405                       XMM22, XMM22b, XMM22c, XMM22d,
1406                       XMM23, XMM23b, XMM23c, XMM23d,
1407                       XMM24, XMM24b, XMM24c, XMM24d,
1408                       XMM25, XMM25b, XMM25c, XMM25d,
1409                       XMM26, XMM26b, XMM26c, XMM26d,
1410                       XMM27, XMM27b, XMM27c, XMM27d,
1411                       XMM28, XMM28b, XMM28c, XMM28d,
1412                       XMM29, XMM29b, XMM29c, XMM29d,
1413                       XMM30, XMM30b, XMM30c, XMM30d,
1414                       XMM31, XMM31b, XMM31c, XMM31d
1415 #endif
1416                       );
1417 
1418 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1419 
1420 // Class for all 256bit vector registers
1421 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
1422                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
1423                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
1424                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
1425                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
1426                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
1427                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
1428                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
1429 #ifdef _LP64
1430                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
1431                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
1432                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1433                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1434                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1435                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1436                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1437                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
1438 #endif
1439                       );
1440 
1441 // Class for all 256bit vector registers
1442 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
1443                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
1444                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
1445                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
1446                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
1447                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
1448                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
1449                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
1450 #ifdef _LP64
1451                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
1452                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
1453                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1454                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1455                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1456                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1457                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1458                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1459                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1460                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1461                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1462                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1463                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1464                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1465                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1466                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1467                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1468                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1469                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1470                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1471                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1472                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1473                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1474                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h
1475 #endif
1476                       );
1477 
1478 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1479 
1480 // Class for all 512bit vector registers
1481 reg_class vectorz_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
1482                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
1483                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
1484                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
1485                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
1486                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
1487                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
1488                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p
1489 #ifdef _LP64
1490                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
1491                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
1492                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1493                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1494                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1495                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1496                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1497                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
1498                      ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1499                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1500                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1501                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1502                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1503                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1504                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1505                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1506                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1507                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1508                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1509                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1510                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1511                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1512                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1513                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p
1514 #endif
1515                       );
1516 
1517 %}
1518 
1519 
1520 //----------SOURCE BLOCK-------------------------------------------------------
1521 // This is a block of C++ code which provides values, functions, and
1522 // definitions necessary in the rest of the architecture description
1523 
1524 source_hpp %{
1525 // Header information of the source block.
1526 // Method declarations/definitions which are used outside
1527 // the ad-scope can conveniently be defined here.
1528 //
1529 // To keep related declarations/definitions/uses close together,
1530 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
1531 
1532 class NativeJump;
1533 
1534 class CallStubImpl {
1535 
1536   //--------------------------------------------------------------
1537   //---<  Used for optimization in Compile::shorten_branches  >---
1538   //--------------------------------------------------------------
1539 
1540  public:
1541   // Size of call trampoline stub.
1542   static uint size_call_trampoline() {
1543     return 0; // no call trampolines on this platform
1544   }
1545 
1546   // number of relocations needed by a call trampoline stub
1547   static uint reloc_call_trampoline() {
1548     return 0; // no call trampolines on this platform
1549   }
1550 };
1551 
1552 class HandlerImpl {
1553 
1554  public:
1555 
1556   static int emit_exception_handler(CodeBuffer &cbuf);
1557   static int emit_deopt_handler(CodeBuffer& cbuf);
1558 
1559   static uint size_exception_handler() {
1560     // NativeCall instruction size is the same as NativeJump.
1561     // exception handler starts out as jump and can be patched to
1562     // a call be deoptimization.  (4932387)
1563     // Note that this value is also credited (in output.cpp) to
1564     // the size of the code section.
1565     return NativeJump::instruction_size;
1566   }
1567 
1568 #ifdef _LP64
1569   static uint size_deopt_handler() {
1570     // three 5 byte instructions
1571     return 15;
1572   }
1573 #else
1574   static uint size_deopt_handler() {
1575     // NativeCall instruction size is the same as NativeJump.
1576     // exception handler starts out as jump and can be patched to
1577     // a call be deoptimization.  (4932387)
1578     // Note that this value is also credited (in output.cpp) to
1579     // the size of the code section.
1580     return 5 + NativeJump::instruction_size; // pushl(); jmp;
1581   }
1582 #endif
1583 };
1584 
1585 %} // end source_hpp
1586 
1587 source %{
1588 
1589 // Emit exception handler code.
1590 // Stuff framesize into a register and call a VM stub routine.
1591 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
1592 
1593   // Note that the code buffer's insts_mark is always relative to insts.
1594   // That's why we must use the macroassembler to generate a handler.
1595   MacroAssembler _masm(&cbuf);
1596   address base = __ start_a_stub(size_exception_handler());
1597   if (base == NULL) {
1598     ciEnv::current()->record_failure("CodeCache is full");
1599     return 0;  // CodeBuffer::expand failed
1600   }
1601   int offset = __ offset();
1602   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1603   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1604   __ end_a_stub();
1605   return offset;
1606 }
1607 
1608 // Emit deopt handler code.
1609 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
1610 
1611   // Note that the code buffer's insts_mark is always relative to insts.
1612   // That's why we must use the macroassembler to generate a handler.
1613   MacroAssembler _masm(&cbuf);
1614   address base = __ start_a_stub(size_deopt_handler());
1615   if (base == NULL) {
1616     ciEnv::current()->record_failure("CodeCache is full");
1617     return 0;  // CodeBuffer::expand failed
1618   }
1619   int offset = __ offset();
1620 
1621 #ifdef _LP64
1622   address the_pc = (address) __ pc();
1623   Label next;
1624   // push a "the_pc" on the stack without destroying any registers
1625   // as they all may be live.
1626 
1627   // push address of "next"
1628   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1629   __ bind(next);
1630   // adjust it so it matches "the_pc"
1631   __ subptr(Address(rsp, 0), __ offset() - offset);
1632 #else
1633   InternalAddress here(__ pc());
1634   __ pushptr(here.addr());
1635 #endif
1636 
1637   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1638   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1639   __ end_a_stub();
1640   return offset;
1641 }
1642 
1643 
1644 //=============================================================================
1645 
1646   // Float masks come from different places depending on platform.
1647 #ifdef _LP64
1648   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
1649   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
1650   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
1651   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
1652 #else
1653   static address float_signmask()  { return (address)float_signmask_pool; }
1654   static address float_signflip()  { return (address)float_signflip_pool; }
1655   static address double_signmask() { return (address)double_signmask_pool; }
1656   static address double_signflip() { return (address)double_signflip_pool; }
1657 #endif
1658 
1659 
1660 const bool Matcher::match_rule_supported(int opcode) {
1661   if (!has_match_rule(opcode))
1662     return false;
1663 
1664   bool ret_value = true;
1665   switch (opcode) {
1666     case Op_PopCountI:
1667     case Op_PopCountL:
1668       if (!UsePopCountInstruction)
1669         ret_value = false;
1670       break;
1671     case Op_MulVI:
1672       if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
1673         ret_value = false;
1674       break;
1675     case Op_MulVL:
1676     case Op_MulReductionVL:
1677       if (VM_Version::supports_avx512dq() == false)
1678         ret_value = false;
1679       break;
1680     case Op_AddReductionVL:
1681       if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here
1682         ret_value = false;
1683       break;
1684     case Op_AddReductionVI:
1685       if (UseSSE < 3) // requires at least SSE3
1686         ret_value = false;
1687       break;
1688     case Op_MulReductionVI:
1689       if (UseSSE < 4) // requires at least SSE4
1690         ret_value = false;
1691       break;
1692     case Op_AddReductionVF:
1693     case Op_AddReductionVD:
1694     case Op_MulReductionVF:
1695     case Op_MulReductionVD:
1696       if (UseSSE < 1) // requires at least SSE
1697         ret_value = false;
1698       break;
1699     case Op_SqrtVD:
1700       if (UseAVX < 1) // enabled for AVX only
1701         ret_value = false;
1702       break;
1703     case Op_CompareAndSwapL:
1704 #ifdef _LP64
1705     case Op_CompareAndSwapP:
1706 #endif
1707       if (!VM_Version::supports_cx8())
1708         ret_value = false;
1709       break;
1710     case Op_CMoveVD:
1711       if (UseAVX < 1 || UseAVX > 2)
1712         ret_value = false;
1713       break;
1714     case Op_StrIndexOf:
1715       if (!UseSSE42Intrinsics)
1716         ret_value = false;
1717       break;
1718     case Op_StrIndexOfChar:
1719       if (!(UseSSE > 4))
1720         ret_value = false;
1721       break;
1722   }
1723 
1724   return ret_value;  // Per default match rules are supported.
1725 }
1726 
1727 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
1728   // identify extra cases that we might want to provide match rules for
1729   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
1730   bool ret_value = match_rule_supported(opcode);
1731   if (ret_value) {
1732     switch (opcode) {
1733       case Op_AddVB:
1734       case Op_SubVB:
1735         if ((vlen == 64) && (VM_Version::supports_avx512bw() == false))
1736           ret_value = false;
1737         break;
1738       case Op_URShiftVS:
1739       case Op_RShiftVS:
1740       case Op_LShiftVS:
1741       case Op_MulVS:
1742       case Op_AddVS:
1743       case Op_SubVS:
1744         if ((vlen == 32) && (VM_Version::supports_avx512bw() == false))
1745           ret_value = false;
1746         break;
1747       case Op_CMoveVD:
1748         if (vlen != 4)
1749           ret_value  = false;
1750         break;
1751     }
1752   }
1753 
1754   return ret_value;  // Per default match rules are supported.
1755 }
1756 
1757 const int Matcher::float_pressure(int default_pressure_threshold) {
1758   int float_pressure_threshold = default_pressure_threshold;
1759 #ifdef _LP64
1760   if (UseAVX > 2) {
1761     // Increase pressure threshold on machines with AVX3 which have
1762     // 2x more XMM registers.
1763     float_pressure_threshold = default_pressure_threshold * 2;
1764   }
1765 #endif
1766   return float_pressure_threshold;
1767 }
1768 
1769 // Max vector size in bytes. 0 if not supported.
1770 const int Matcher::vector_width_in_bytes(BasicType bt) {
1771   assert(is_java_primitive(bt), "only primitive type vectors");
1772   if (UseSSE < 2) return 0;
1773   // SSE2 supports 128bit vectors for all types.
1774   // AVX2 supports 256bit vectors for all types.
1775   // AVX2/EVEX supports 512bit vectors for all types.
1776   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
1777   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
1778   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
1779     size = (UseAVX > 2) ? 64 : 32;
1780   // Use flag to limit vector size.
1781   size = MIN2(size,(int)MaxVectorSize);
1782   // Minimum 2 values in vector (or 4 for bytes).
1783   switch (bt) {
1784   case T_DOUBLE:
1785   case T_LONG:
1786     if (size < 16) return 0;
1787     break;
1788   case T_FLOAT:
1789   case T_INT:
1790     if (size < 8) return 0;
1791     break;
1792   case T_BOOLEAN:
1793     if (size < 4) return 0;
1794     break;
1795   case T_CHAR:
1796     if (size < 4) return 0;
1797     break;
1798   case T_BYTE:
1799     if (size < 4) return 0;
1800     break;
1801   case T_SHORT:
1802     if (size < 4) return 0;
1803     break;
1804   default:
1805     ShouldNotReachHere();
1806   }
1807   return size;
1808 }
1809 
1810 // Limits on vector size (number of elements) loaded into vector.
1811 const int Matcher::max_vector_size(const BasicType bt) {
1812   return vector_width_in_bytes(bt)/type2aelembytes(bt);
1813 }
1814 const int Matcher::min_vector_size(const BasicType bt) {
1815   int max_size = max_vector_size(bt);
1816   // Min size which can be loaded into vector is 4 bytes.
1817   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
1818   return MIN2(size,max_size);
1819 }
1820 
1821 // Vector ideal reg corresponding to specidied size in bytes
1822 const int Matcher::vector_ideal_reg(int size) {
1823   assert(MaxVectorSize >= size, "");
1824   switch(size) {
1825     case  4: return Op_VecS;
1826     case  8: return Op_VecD;
1827     case 16: return Op_VecX;
1828     case 32: return Op_VecY;
1829     case 64: return Op_VecZ;
1830   }
1831   ShouldNotReachHere();
1832   return 0;
1833 }
1834 
1835 // Only lowest bits of xmm reg are used for vector shift count.
1836 const int Matcher::vector_shift_count_ideal_reg(int size) {
1837   return Op_VecS;
1838 }
1839 
1840 // x86 supports misaligned vectors store/load.
1841 const bool Matcher::misaligned_vectors_ok() {
1842   return !AlignVector; // can be changed by flag
1843 }
1844 
1845 // x86 AES instructions are compatible with SunJCE expanded
1846 // keys, hence we do not need to pass the original key to stubs
1847 const bool Matcher::pass_original_key_for_aes() {
1848   return false;
1849 }
1850 
1851 // Helper methods for MachSpillCopyNode::implementation().
1852 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
1853                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
1854   // In 64-bit VM size calculation is very complex. Emitting instructions
1855   // into scratch buffer is used to get size in 64-bit VM.
1856   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
1857   assert(ireg == Op_VecS || // 32bit vector
1858          (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
1859          (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
1860          "no non-adjacent vector moves" );
1861   if (cbuf) {
1862     MacroAssembler _masm(cbuf);
1863     int offset = __ offset();
1864     switch (ireg) {
1865     case Op_VecS: // copy whole register
1866     case Op_VecD:
1867     case Op_VecX:
1868       __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1869       break;
1870     case Op_VecY:
1871       __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1872       break;
1873     case Op_VecZ:
1874       __ evmovdqul(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
1875       break;
1876     default:
1877       ShouldNotReachHere();
1878     }
1879     int size = __ offset() - offset;
1880 #ifdef ASSERT
1881     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
1882     assert(!do_size || size == 4, "incorrect size calculattion");
1883 #endif
1884     return size;
1885 #ifndef PRODUCT
1886   } else if (!do_size) {
1887     switch (ireg) {
1888     case Op_VecS:
1889     case Op_VecD:
1890     case Op_VecX:
1891       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
1892       break;
1893     case Op_VecY:
1894     case Op_VecZ:
1895       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
1896       break;
1897     default:
1898       ShouldNotReachHere();
1899     }
1900 #endif
1901   }
1902   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
1903   return (UseAVX > 2) ? 6 : 4;
1904 }
1905 
1906 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
1907                             int stack_offset, int reg, uint ireg, outputStream* st) {
1908   // In 64-bit VM size calculation is very complex. Emitting instructions
1909   // into scratch buffer is used to get size in 64-bit VM.
1910   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
1911   if (cbuf) {
1912     MacroAssembler _masm(cbuf);
1913     int offset = __ offset();
1914     if (is_load) {
1915       switch (ireg) {
1916       case Op_VecS:
1917         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1918         break;
1919       case Op_VecD:
1920         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1921         break;
1922       case Op_VecX:
1923         __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1924         break;
1925       case Op_VecY:
1926         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1927         break;
1928       case Op_VecZ:
1929         __ evmovdqul(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
1930         break;
1931       default:
1932         ShouldNotReachHere();
1933       }
1934     } else { // store
1935       switch (ireg) {
1936       case Op_VecS:
1937         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1938         break;
1939       case Op_VecD:
1940         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1941         break;
1942       case Op_VecX:
1943         __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1944         break;
1945       case Op_VecY:
1946         __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1947         break;
1948       case Op_VecZ:
1949         __ evmovdqul(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
1950         break;
1951       default:
1952         ShouldNotReachHere();
1953       }
1954     }
1955     int size = __ offset() - offset;
1956 #ifdef ASSERT
1957     int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4);
1958     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
1959     assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
1960 #endif
1961     return size;
1962 #ifndef PRODUCT
1963   } else if (!do_size) {
1964     if (is_load) {
1965       switch (ireg) {
1966       case Op_VecS:
1967         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1968         break;
1969       case Op_VecD:
1970         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1971         break;
1972        case Op_VecX:
1973         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1974         break;
1975       case Op_VecY:
1976       case Op_VecZ:
1977         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1978         break;
1979       default:
1980         ShouldNotReachHere();
1981       }
1982     } else { // store
1983       switch (ireg) {
1984       case Op_VecS:
1985         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1986         break;
1987       case Op_VecD:
1988         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1989         break;
1990        case Op_VecX:
1991         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1992         break;
1993       case Op_VecY:
1994       case Op_VecZ:
1995         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1996         break;
1997       default:
1998         ShouldNotReachHere();
1999       }
2000     }
2001 #endif
2002   }
2003   bool is_single_byte = false;
2004   int vec_len = 0;
2005   if ((UseAVX > 2) && (stack_offset != 0)) {
2006     int tuple_type = Assembler::EVEX_FVM;
2007     int input_size = Assembler::EVEX_32bit;
2008     switch (ireg) {
2009     case Op_VecS:
2010       tuple_type = Assembler::EVEX_T1S;
2011       break;
2012     case Op_VecD:
2013       tuple_type = Assembler::EVEX_T1S;
2014       input_size = Assembler::EVEX_64bit;
2015       break;
2016     case Op_VecX:
2017       break;
2018     case Op_VecY:
2019       vec_len = 1;
2020       break;
2021     case Op_VecZ:
2022       vec_len = 2;
2023       break;
2024     }
2025     is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0);
2026   }
2027   int offset_size = 0;
2028   int size = 5;
2029   if (UseAVX > 2 ) {
2030     if (VM_Version::supports_avx512novl() && (vec_len == 2)) {
2031       offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
2032       size += 2; // Need an additional two bytes for EVEX encoding
2033     } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) {
2034       offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4);
2035     } else {
2036       offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
2037       size += 2; // Need an additional two bytes for EVEX encodding
2038     }
2039   } else {
2040     offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4);
2041   }
2042   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
2043   return size+offset_size;
2044 }
2045 
2046 static inline jfloat replicate4_imm(int con, int width) {
2047   // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
2048   assert(width == 1 || width == 2, "only byte or short types here");
2049   int bit_width = width * 8;
2050   jint val = con;
2051   val &= (1 << bit_width) - 1;  // mask off sign bits
2052   while(bit_width < 32) {
2053     val |= (val << bit_width);
2054     bit_width <<= 1;
2055   }
2056   jfloat fval = *((jfloat*) &val);  // coerce to float type
2057   return fval;
2058 }
2059 
2060 static inline jdouble replicate8_imm(int con, int width) {
2061   // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
2062   assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
2063   int bit_width = width * 8;
2064   jlong val = con;
2065   val &= (((jlong) 1) << bit_width) - 1;  // mask off sign bits
2066   while(bit_width < 64) {
2067     val |= (val << bit_width);
2068     bit_width <<= 1;
2069   }
2070   jdouble dval = *((jdouble*) &val);  // coerce to double type
2071   return dval;
2072 }
2073 
2074 #ifndef PRODUCT
2075   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2076     st->print("nop \t# %d bytes pad for loops and calls", _count);
2077   }
2078 #endif
2079 
2080   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2081     MacroAssembler _masm(&cbuf);
2082     __ nop(_count);
2083   }
2084 
2085   uint MachNopNode::size(PhaseRegAlloc*) const {
2086     return _count;
2087   }
2088 
2089 #ifndef PRODUCT
2090   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
2091     st->print("# breakpoint");
2092   }
2093 #endif
2094 
2095   void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
2096     MacroAssembler _masm(&cbuf);
2097     __ int3();
2098   }
2099 
2100   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
2101     return MachNode::size(ra_);
2102   }
2103 
2104 %}
2105 
2106 encode %{
2107 
2108   enc_class call_epilog %{
2109     if (VerifyStackAtCalls) {
2110       // Check that stack depth is unchanged: find majik cookie on stack
2111       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
2112       MacroAssembler _masm(&cbuf);
2113       Label L;
2114       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
2115       __ jccb(Assembler::equal, L);
2116       // Die if stack mismatch
2117       __ int3();
2118       __ bind(L);
2119     }
2120   %}
2121 
2122 %}
2123 
2124 
2125 //----------OPERANDS-----------------------------------------------------------
2126 // Operand definitions must precede instruction definitions for correct parsing
2127 // in the ADLC because operands constitute user defined types which are used in
2128 // instruction definitions.
2129 
2130 // This one generically applies only for evex, so only one version
2131 operand vecZ() %{
2132   constraint(ALLOC_IN_RC(vectorz_reg));
2133   match(VecZ);
2134 
2135   format %{ %}
2136   interface(REG_INTER);
2137 %}
2138 
2139 // Comparison Code for FP conditional move
2140 operand cmpOp_vcmppd() %{
2141   match(Bool);
2142 
2143   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
2144             n->as_Bool()->_test._test != BoolTest::no_overflow);
2145   format %{ "" %}
2146   interface(COND_INTER) %{
2147     equal        (0x0, "eq");
2148     less         (0x1, "lt");
2149     less_equal   (0x2, "le");
2150     not_equal    (0xC, "ne");
2151     greater_equal(0xD, "ge");
2152     greater      (0xE, "gt");
2153     //TODO cannot compile (adlc breaks) without two next lines with error:
2154     // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{
2155     // equal' for overflow.
2156     overflow     (0x20, "o");  // not really supported by the instruction
2157     no_overflow  (0x21, "no"); // not really supported by the instruction
2158   %}
2159 %}
2160 
2161 
2162 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
2163 
2164 // ============================================================================
2165 
2166 instruct ShouldNotReachHere() %{
2167   match(Halt);
2168   format %{ "int3\t# ShouldNotReachHere" %}
2169   ins_encode %{
2170     __ int3();
2171   %}
2172   ins_pipe(pipe_slow);
2173 %}
2174 
2175 // ============================================================================
2176 
2177 instruct addF_reg(regF dst, regF src) %{
2178   predicate((UseSSE>=1) && (UseAVX == 0));
2179   match(Set dst (AddF dst src));
2180 
2181   format %{ "addss   $dst, $src" %}
2182   ins_cost(150);
2183   ins_encode %{
2184     __ addss($dst$$XMMRegister, $src$$XMMRegister);
2185   %}
2186   ins_pipe(pipe_slow);
2187 %}
2188 
2189 instruct addF_mem(regF dst, memory src) %{
2190   predicate((UseSSE>=1) && (UseAVX == 0));
2191   match(Set dst (AddF dst (LoadF src)));
2192 
2193   format %{ "addss   $dst, $src" %}
2194   ins_cost(150);
2195   ins_encode %{
2196     __ addss($dst$$XMMRegister, $src$$Address);
2197   %}
2198   ins_pipe(pipe_slow);
2199 %}
2200 
2201 instruct addF_imm(regF dst, immF con) %{
2202   predicate((UseSSE>=1) && (UseAVX == 0));
2203   match(Set dst (AddF dst con));
2204   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2205   ins_cost(150);
2206   ins_encode %{
2207     __ addss($dst$$XMMRegister, $constantaddress($con));
2208   %}
2209   ins_pipe(pipe_slow);
2210 %}
2211 
2212 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
2213   predicate(UseAVX > 0);
2214   match(Set dst (AddF src1 src2));
2215 
2216   format %{ "vaddss  $dst, $src1, $src2" %}
2217   ins_cost(150);
2218   ins_encode %{
2219     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2220   %}
2221   ins_pipe(pipe_slow);
2222 %}
2223 
2224 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
2225   predicate(UseAVX > 0);
2226   match(Set dst (AddF src1 (LoadF src2)));
2227 
2228   format %{ "vaddss  $dst, $src1, $src2" %}
2229   ins_cost(150);
2230   ins_encode %{
2231     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2232   %}
2233   ins_pipe(pipe_slow);
2234 %}
2235 
2236 instruct addF_reg_imm(regF dst, regF src, immF con) %{
2237   predicate(UseAVX > 0);
2238   match(Set dst (AddF src con));
2239 
2240   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2241   ins_cost(150);
2242   ins_encode %{
2243     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2244   %}
2245   ins_pipe(pipe_slow);
2246 %}
2247 
2248 instruct addD_reg(regD dst, regD src) %{
2249   predicate((UseSSE>=2) && (UseAVX == 0));
2250   match(Set dst (AddD dst src));
2251 
2252   format %{ "addsd   $dst, $src" %}
2253   ins_cost(150);
2254   ins_encode %{
2255     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
2256   %}
2257   ins_pipe(pipe_slow);
2258 %}
2259 
2260 instruct addD_mem(regD dst, memory src) %{
2261   predicate((UseSSE>=2) && (UseAVX == 0));
2262   match(Set dst (AddD dst (LoadD src)));
2263 
2264   format %{ "addsd   $dst, $src" %}
2265   ins_cost(150);
2266   ins_encode %{
2267     __ addsd($dst$$XMMRegister, $src$$Address);
2268   %}
2269   ins_pipe(pipe_slow);
2270 %}
2271 
2272 instruct addD_imm(regD dst, immD con) %{
2273   predicate((UseSSE>=2) && (UseAVX == 0));
2274   match(Set dst (AddD dst con));
2275   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2276   ins_cost(150);
2277   ins_encode %{
2278     __ addsd($dst$$XMMRegister, $constantaddress($con));
2279   %}
2280   ins_pipe(pipe_slow);
2281 %}
2282 
2283 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
2284   predicate(UseAVX > 0);
2285   match(Set dst (AddD src1 src2));
2286 
2287   format %{ "vaddsd  $dst, $src1, $src2" %}
2288   ins_cost(150);
2289   ins_encode %{
2290     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2291   %}
2292   ins_pipe(pipe_slow);
2293 %}
2294 
2295 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
2296   predicate(UseAVX > 0);
2297   match(Set dst (AddD src1 (LoadD src2)));
2298 
2299   format %{ "vaddsd  $dst, $src1, $src2" %}
2300   ins_cost(150);
2301   ins_encode %{
2302     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2303   %}
2304   ins_pipe(pipe_slow);
2305 %}
2306 
2307 instruct addD_reg_imm(regD dst, regD src, immD con) %{
2308   predicate(UseAVX > 0);
2309   match(Set dst (AddD src con));
2310 
2311   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2312   ins_cost(150);
2313   ins_encode %{
2314     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2315   %}
2316   ins_pipe(pipe_slow);
2317 %}
2318 
2319 instruct subF_reg(regF dst, regF src) %{
2320   predicate((UseSSE>=1) && (UseAVX == 0));
2321   match(Set dst (SubF dst src));
2322 
2323   format %{ "subss   $dst, $src" %}
2324   ins_cost(150);
2325   ins_encode %{
2326     __ subss($dst$$XMMRegister, $src$$XMMRegister);
2327   %}
2328   ins_pipe(pipe_slow);
2329 %}
2330 
2331 instruct subF_mem(regF dst, memory src) %{
2332   predicate((UseSSE>=1) && (UseAVX == 0));
2333   match(Set dst (SubF dst (LoadF src)));
2334 
2335   format %{ "subss   $dst, $src" %}
2336   ins_cost(150);
2337   ins_encode %{
2338     __ subss($dst$$XMMRegister, $src$$Address);
2339   %}
2340   ins_pipe(pipe_slow);
2341 %}
2342 
2343 instruct subF_imm(regF dst, immF con) %{
2344   predicate((UseSSE>=1) && (UseAVX == 0));
2345   match(Set dst (SubF dst con));
2346   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2347   ins_cost(150);
2348   ins_encode %{
2349     __ subss($dst$$XMMRegister, $constantaddress($con));
2350   %}
2351   ins_pipe(pipe_slow);
2352 %}
2353 
2354 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
2355   predicate(UseAVX > 0);
2356   match(Set dst (SubF src1 src2));
2357 
2358   format %{ "vsubss  $dst, $src1, $src2" %}
2359   ins_cost(150);
2360   ins_encode %{
2361     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2362   %}
2363   ins_pipe(pipe_slow);
2364 %}
2365 
2366 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
2367   predicate(UseAVX > 0);
2368   match(Set dst (SubF src1 (LoadF src2)));
2369 
2370   format %{ "vsubss  $dst, $src1, $src2" %}
2371   ins_cost(150);
2372   ins_encode %{
2373     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2374   %}
2375   ins_pipe(pipe_slow);
2376 %}
2377 
2378 instruct subF_reg_imm(regF dst, regF src, immF con) %{
2379   predicate(UseAVX > 0);
2380   match(Set dst (SubF src con));
2381 
2382   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2383   ins_cost(150);
2384   ins_encode %{
2385     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2386   %}
2387   ins_pipe(pipe_slow);
2388 %}
2389 
2390 instruct subD_reg(regD dst, regD src) %{
2391   predicate((UseSSE>=2) && (UseAVX == 0));
2392   match(Set dst (SubD dst src));
2393 
2394   format %{ "subsd   $dst, $src" %}
2395   ins_cost(150);
2396   ins_encode %{
2397     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
2398   %}
2399   ins_pipe(pipe_slow);
2400 %}
2401 
2402 instruct subD_mem(regD dst, memory src) %{
2403   predicate((UseSSE>=2) && (UseAVX == 0));
2404   match(Set dst (SubD dst (LoadD src)));
2405 
2406   format %{ "subsd   $dst, $src" %}
2407   ins_cost(150);
2408   ins_encode %{
2409     __ subsd($dst$$XMMRegister, $src$$Address);
2410   %}
2411   ins_pipe(pipe_slow);
2412 %}
2413 
2414 instruct subD_imm(regD dst, immD con) %{
2415   predicate((UseSSE>=2) && (UseAVX == 0));
2416   match(Set dst (SubD dst con));
2417   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2418   ins_cost(150);
2419   ins_encode %{
2420     __ subsd($dst$$XMMRegister, $constantaddress($con));
2421   %}
2422   ins_pipe(pipe_slow);
2423 %}
2424 
2425 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
2426   predicate(UseAVX > 0);
2427   match(Set dst (SubD src1 src2));
2428 
2429   format %{ "vsubsd  $dst, $src1, $src2" %}
2430   ins_cost(150);
2431   ins_encode %{
2432     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2433   %}
2434   ins_pipe(pipe_slow);
2435 %}
2436 
2437 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
2438   predicate(UseAVX > 0);
2439   match(Set dst (SubD src1 (LoadD src2)));
2440 
2441   format %{ "vsubsd  $dst, $src1, $src2" %}
2442   ins_cost(150);
2443   ins_encode %{
2444     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2445   %}
2446   ins_pipe(pipe_slow);
2447 %}
2448 
2449 instruct subD_reg_imm(regD dst, regD src, immD con) %{
2450   predicate(UseAVX > 0);
2451   match(Set dst (SubD src con));
2452 
2453   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2454   ins_cost(150);
2455   ins_encode %{
2456     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2457   %}
2458   ins_pipe(pipe_slow);
2459 %}
2460 
2461 instruct mulF_reg(regF dst, regF src) %{
2462   predicate((UseSSE>=1) && (UseAVX == 0));
2463   match(Set dst (MulF dst src));
2464 
2465   format %{ "mulss   $dst, $src" %}
2466   ins_cost(150);
2467   ins_encode %{
2468     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
2469   %}
2470   ins_pipe(pipe_slow);
2471 %}
2472 
2473 instruct mulF_mem(regF dst, memory src) %{
2474   predicate((UseSSE>=1) && (UseAVX == 0));
2475   match(Set dst (MulF dst (LoadF src)));
2476 
2477   format %{ "mulss   $dst, $src" %}
2478   ins_cost(150);
2479   ins_encode %{
2480     __ mulss($dst$$XMMRegister, $src$$Address);
2481   %}
2482   ins_pipe(pipe_slow);
2483 %}
2484 
2485 instruct mulF_imm(regF dst, immF con) %{
2486   predicate((UseSSE>=1) && (UseAVX == 0));
2487   match(Set dst (MulF dst con));
2488   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2489   ins_cost(150);
2490   ins_encode %{
2491     __ mulss($dst$$XMMRegister, $constantaddress($con));
2492   %}
2493   ins_pipe(pipe_slow);
2494 %}
2495 
2496 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
2497   predicate(UseAVX > 0);
2498   match(Set dst (MulF src1 src2));
2499 
2500   format %{ "vmulss  $dst, $src1, $src2" %}
2501   ins_cost(150);
2502   ins_encode %{
2503     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2504   %}
2505   ins_pipe(pipe_slow);
2506 %}
2507 
2508 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
2509   predicate(UseAVX > 0);
2510   match(Set dst (MulF src1 (LoadF src2)));
2511 
2512   format %{ "vmulss  $dst, $src1, $src2" %}
2513   ins_cost(150);
2514   ins_encode %{
2515     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2516   %}
2517   ins_pipe(pipe_slow);
2518 %}
2519 
2520 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
2521   predicate(UseAVX > 0);
2522   match(Set dst (MulF src con));
2523 
2524   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2525   ins_cost(150);
2526   ins_encode %{
2527     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2528   %}
2529   ins_pipe(pipe_slow);
2530 %}
2531 
2532 instruct mulD_reg(regD dst, regD src) %{
2533   predicate((UseSSE>=2) && (UseAVX == 0));
2534   match(Set dst (MulD dst src));
2535 
2536   format %{ "mulsd   $dst, $src" %}
2537   ins_cost(150);
2538   ins_encode %{
2539     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
2540   %}
2541   ins_pipe(pipe_slow);
2542 %}
2543 
2544 instruct mulD_mem(regD dst, memory src) %{
2545   predicate((UseSSE>=2) && (UseAVX == 0));
2546   match(Set dst (MulD dst (LoadD src)));
2547 
2548   format %{ "mulsd   $dst, $src" %}
2549   ins_cost(150);
2550   ins_encode %{
2551     __ mulsd($dst$$XMMRegister, $src$$Address);
2552   %}
2553   ins_pipe(pipe_slow);
2554 %}
2555 
2556 instruct mulD_imm(regD dst, immD con) %{
2557   predicate((UseSSE>=2) && (UseAVX == 0));
2558   match(Set dst (MulD dst con));
2559   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2560   ins_cost(150);
2561   ins_encode %{
2562     __ mulsd($dst$$XMMRegister, $constantaddress($con));
2563   %}
2564   ins_pipe(pipe_slow);
2565 %}
2566 
2567 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
2568   predicate(UseAVX > 0);
2569   match(Set dst (MulD src1 src2));
2570 
2571   format %{ "vmulsd  $dst, $src1, $src2" %}
2572   ins_cost(150);
2573   ins_encode %{
2574     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2575   %}
2576   ins_pipe(pipe_slow);
2577 %}
2578 
2579 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
2580   predicate(UseAVX > 0);
2581   match(Set dst (MulD src1 (LoadD src2)));
2582 
2583   format %{ "vmulsd  $dst, $src1, $src2" %}
2584   ins_cost(150);
2585   ins_encode %{
2586     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2587   %}
2588   ins_pipe(pipe_slow);
2589 %}
2590 
2591 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
2592   predicate(UseAVX > 0);
2593   match(Set dst (MulD src con));
2594 
2595   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2596   ins_cost(150);
2597   ins_encode %{
2598     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2599   %}
2600   ins_pipe(pipe_slow);
2601 %}
2602 
2603 instruct divF_reg(regF dst, regF src) %{
2604   predicate((UseSSE>=1) && (UseAVX == 0));
2605   match(Set dst (DivF dst src));
2606 
2607   format %{ "divss   $dst, $src" %}
2608   ins_cost(150);
2609   ins_encode %{
2610     __ divss($dst$$XMMRegister, $src$$XMMRegister);
2611   %}
2612   ins_pipe(pipe_slow);
2613 %}
2614 
2615 instruct divF_mem(regF dst, memory src) %{
2616   predicate((UseSSE>=1) && (UseAVX == 0));
2617   match(Set dst (DivF dst (LoadF src)));
2618 
2619   format %{ "divss   $dst, $src" %}
2620   ins_cost(150);
2621   ins_encode %{
2622     __ divss($dst$$XMMRegister, $src$$Address);
2623   %}
2624   ins_pipe(pipe_slow);
2625 %}
2626 
2627 instruct divF_imm(regF dst, immF con) %{
2628   predicate((UseSSE>=1) && (UseAVX == 0));
2629   match(Set dst (DivF dst con));
2630   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2631   ins_cost(150);
2632   ins_encode %{
2633     __ divss($dst$$XMMRegister, $constantaddress($con));
2634   %}
2635   ins_pipe(pipe_slow);
2636 %}
2637 
2638 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
2639   predicate(UseAVX > 0);
2640   match(Set dst (DivF src1 src2));
2641 
2642   format %{ "vdivss  $dst, $src1, $src2" %}
2643   ins_cost(150);
2644   ins_encode %{
2645     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2646   %}
2647   ins_pipe(pipe_slow);
2648 %}
2649 
2650 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
2651   predicate(UseAVX > 0);
2652   match(Set dst (DivF src1 (LoadF src2)));
2653 
2654   format %{ "vdivss  $dst, $src1, $src2" %}
2655   ins_cost(150);
2656   ins_encode %{
2657     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2658   %}
2659   ins_pipe(pipe_slow);
2660 %}
2661 
2662 instruct divF_reg_imm(regF dst, regF src, immF con) %{
2663   predicate(UseAVX > 0);
2664   match(Set dst (DivF src con));
2665 
2666   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2667   ins_cost(150);
2668   ins_encode %{
2669     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2670   %}
2671   ins_pipe(pipe_slow);
2672 %}
2673 
2674 instruct divD_reg(regD dst, regD src) %{
2675   predicate((UseSSE>=2) && (UseAVX == 0));
2676   match(Set dst (DivD dst src));
2677 
2678   format %{ "divsd   $dst, $src" %}
2679   ins_cost(150);
2680   ins_encode %{
2681     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
2682   %}
2683   ins_pipe(pipe_slow);
2684 %}
2685 
2686 instruct divD_mem(regD dst, memory src) %{
2687   predicate((UseSSE>=2) && (UseAVX == 0));
2688   match(Set dst (DivD dst (LoadD src)));
2689 
2690   format %{ "divsd   $dst, $src" %}
2691   ins_cost(150);
2692   ins_encode %{
2693     __ divsd($dst$$XMMRegister, $src$$Address);
2694   %}
2695   ins_pipe(pipe_slow);
2696 %}
2697 
2698 instruct divD_imm(regD dst, immD con) %{
2699   predicate((UseSSE>=2) && (UseAVX == 0));
2700   match(Set dst (DivD dst con));
2701   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2702   ins_cost(150);
2703   ins_encode %{
2704     __ divsd($dst$$XMMRegister, $constantaddress($con));
2705   %}
2706   ins_pipe(pipe_slow);
2707 %}
2708 
2709 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
2710   predicate(UseAVX > 0);
2711   match(Set dst (DivD src1 src2));
2712 
2713   format %{ "vdivsd  $dst, $src1, $src2" %}
2714   ins_cost(150);
2715   ins_encode %{
2716     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2717   %}
2718   ins_pipe(pipe_slow);
2719 %}
2720 
2721 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
2722   predicate(UseAVX > 0);
2723   match(Set dst (DivD src1 (LoadD src2)));
2724 
2725   format %{ "vdivsd  $dst, $src1, $src2" %}
2726   ins_cost(150);
2727   ins_encode %{
2728     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2729   %}
2730   ins_pipe(pipe_slow);
2731 %}
2732 
2733 instruct divD_reg_imm(regD dst, regD src, immD con) %{
2734   predicate(UseAVX > 0);
2735   match(Set dst (DivD src con));
2736 
2737   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2738   ins_cost(150);
2739   ins_encode %{
2740     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2741   %}
2742   ins_pipe(pipe_slow);
2743 %}
2744 
2745 instruct absF_reg(regF dst) %{
2746   predicate((UseSSE>=1) && (UseAVX == 0));
2747   match(Set dst (AbsF dst));
2748   ins_cost(150);
2749   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
2750   ins_encode %{
2751     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
2752   %}
2753   ins_pipe(pipe_slow);
2754 %}
2755 
2756 instruct absF_reg_reg(regF dst, regF src) %{
2757   predicate(VM_Version::supports_avxonly());
2758   match(Set dst (AbsF src));
2759   ins_cost(150);
2760   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
2761   ins_encode %{
2762     int vector_len = 0;
2763     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
2764               ExternalAddress(float_signmask()), vector_len);
2765   %}
2766   ins_pipe(pipe_slow);
2767 %}
2768 
2769 #ifdef _LP64
2770 instruct absF_reg_reg_evex(regF dst, regF src) %{
2771   predicate(UseAVX > 2 && VM_Version::supports_avx512vl());
2772   match(Set dst (AbsF src));
2773   ins_cost(150);
2774   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
2775   ins_encode %{
2776     int vector_len = 0;
2777     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
2778               ExternalAddress(float_signmask()), vector_len);
2779   %}
2780   ins_pipe(pipe_slow);
2781 %}
2782 
2783 instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{
2784   predicate(VM_Version::supports_avx512novl());
2785   match(Set dst (AbsF src1));
2786   effect(TEMP src2);
2787   ins_cost(150);
2788   format %{ "vabsss  $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %}
2789   ins_encode %{
2790     int vector_len = 0;
2791     __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
2792               ExternalAddress(float_signmask()), vector_len);
2793   %}
2794   ins_pipe(pipe_slow);
2795 %}
2796 #else // _LP64
2797 instruct absF_reg_reg_evex(regF dst, regF src) %{
2798   predicate(UseAVX > 2);
2799   match(Set dst (AbsF src));
2800   ins_cost(150);
2801   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
2802   ins_encode %{
2803     int vector_len = 0;
2804     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
2805               ExternalAddress(float_signmask()), vector_len);
2806   %}
2807   ins_pipe(pipe_slow);
2808 %}
2809 #endif
2810 
2811 instruct absD_reg(regD dst) %{
2812   predicate((UseSSE>=2) && (UseAVX == 0));
2813   match(Set dst (AbsD dst));
2814   ins_cost(150);
2815   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
2816             "# abs double by sign masking" %}
2817   ins_encode %{
2818     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
2819   %}
2820   ins_pipe(pipe_slow);
2821 %}
2822 
2823 instruct absD_reg_reg(regD dst, regD src) %{
2824   predicate(VM_Version::supports_avxonly());
2825   match(Set dst (AbsD src));
2826   ins_cost(150);
2827   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
2828             "# abs double by sign masking" %}
2829   ins_encode %{
2830     int vector_len = 0;
2831     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
2832               ExternalAddress(double_signmask()), vector_len);
2833   %}
2834   ins_pipe(pipe_slow);
2835 %}
2836 
2837 #ifdef _LP64
2838 instruct absD_reg_reg_evex(regD dst, regD src) %{
2839   predicate(UseAVX > 2 && VM_Version::supports_avx512vl());
2840   match(Set dst (AbsD src));
2841   ins_cost(150);
2842   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
2843             "# abs double by sign masking" %}
2844   ins_encode %{
2845     int vector_len = 0;
2846     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
2847               ExternalAddress(double_signmask()), vector_len);
2848   %}
2849   ins_pipe(pipe_slow);
2850 %}
2851 
2852 instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{
2853   predicate(VM_Version::supports_avx512novl());
2854   match(Set dst (AbsD src1));
2855   effect(TEMP src2);
2856   ins_cost(150);
2857   format %{ "vabssd  $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %}
2858   ins_encode %{
2859     int vector_len = 0;
2860     __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
2861               ExternalAddress(double_signmask()), vector_len);
2862   %}
2863   ins_pipe(pipe_slow);
2864 %}
2865 #else // _LP64
2866 instruct absD_reg_reg_evex(regD dst, regD src) %{
2867   predicate(UseAVX > 2);
2868   match(Set dst (AbsD src));
2869   ins_cost(150);
2870   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
2871             "# abs double by sign masking" %}
2872   ins_encode %{
2873     int vector_len = 0;
2874     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
2875               ExternalAddress(double_signmask()), vector_len);
2876   %}
2877   ins_pipe(pipe_slow);
2878 %}
2879 #endif
2880 
2881 instruct negF_reg(regF dst) %{
2882   predicate((UseSSE>=1) && (UseAVX == 0));
2883   match(Set dst (NegF dst));
2884   ins_cost(150);
2885   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
2886   ins_encode %{
2887     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
2888   %}
2889   ins_pipe(pipe_slow);
2890 %}
2891 
2892 instruct negF_reg_reg(regF dst, regF src) %{
2893   predicate(UseAVX > 0);
2894   match(Set dst (NegF src));
2895   ins_cost(150);
2896   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
2897   ins_encode %{
2898     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
2899                  ExternalAddress(float_signflip()));
2900   %}
2901   ins_pipe(pipe_slow);
2902 %}
2903 
2904 instruct negD_reg(regD dst) %{
2905   predicate((UseSSE>=2) && (UseAVX == 0));
2906   match(Set dst (NegD dst));
2907   ins_cost(150);
2908   format %{ "xorpd   $dst, [0x8000000000000000]\t"
2909             "# neg double by sign flipping" %}
2910   ins_encode %{
2911     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
2912   %}
2913   ins_pipe(pipe_slow);
2914 %}
2915 
2916 instruct negD_reg_reg(regD dst, regD src) %{
2917   predicate(UseAVX > 0);
2918   match(Set dst (NegD src));
2919   ins_cost(150);
2920   format %{ "vnegatess  $dst, $src, [0x8000000000000000]\t"
2921             "# neg double by sign flipping" %}
2922   ins_encode %{
2923     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
2924                  ExternalAddress(double_signflip()));
2925   %}
2926   ins_pipe(pipe_slow);
2927 %}
2928 
2929 instruct sqrtF_reg(regF dst, regF src) %{
2930   predicate(UseSSE>=1);
2931   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
2932 
2933   format %{ "sqrtss  $dst, $src" %}
2934   ins_cost(150);
2935   ins_encode %{
2936     __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
2937   %}
2938   ins_pipe(pipe_slow);
2939 %}
2940 
2941 instruct sqrtF_mem(regF dst, memory src) %{
2942   predicate(UseSSE>=1);
2943   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
2944 
2945   format %{ "sqrtss  $dst, $src" %}
2946   ins_cost(150);
2947   ins_encode %{
2948     __ sqrtss($dst$$XMMRegister, $src$$Address);
2949   %}
2950   ins_pipe(pipe_slow);
2951 %}
2952 
2953 instruct sqrtF_imm(regF dst, immF con) %{
2954   predicate(UseSSE>=1);
2955   match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
2956   format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2957   ins_cost(150);
2958   ins_encode %{
2959     __ sqrtss($dst$$XMMRegister, $constantaddress($con));
2960   %}
2961   ins_pipe(pipe_slow);
2962 %}
2963 
2964 instruct sqrtD_reg(regD dst, regD src) %{
2965   predicate(UseSSE>=2);
2966   match(Set dst (SqrtD src));
2967 
2968   format %{ "sqrtsd  $dst, $src" %}
2969   ins_cost(150);
2970   ins_encode %{
2971     __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
2972   %}
2973   ins_pipe(pipe_slow);
2974 %}
2975 
2976 instruct sqrtD_mem(regD dst, memory src) %{
2977   predicate(UseSSE>=2);
2978   match(Set dst (SqrtD (LoadD src)));
2979 
2980   format %{ "sqrtsd  $dst, $src" %}
2981   ins_cost(150);
2982   ins_encode %{
2983     __ sqrtsd($dst$$XMMRegister, $src$$Address);
2984   %}
2985   ins_pipe(pipe_slow);
2986 %}
2987 
2988 instruct sqrtD_imm(regD dst, immD con) %{
2989   predicate(UseSSE>=2);
2990   match(Set dst (SqrtD con));
2991   format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2992   ins_cost(150);
2993   ins_encode %{
2994     __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
2995   %}
2996   ins_pipe(pipe_slow);
2997 %}
2998 
2999 // ====================VECTOR INSTRUCTIONS=====================================
3000 
3001 // Load vectors (4 bytes long)
3002 instruct loadV4(vecS dst, memory mem) %{
3003   predicate(n->as_LoadVector()->memory_size() == 4);
3004   match(Set dst (LoadVector mem));
3005   ins_cost(125);
3006   format %{ "movd    $dst,$mem\t! load vector (4 bytes)" %}
3007   ins_encode %{
3008     __ movdl($dst$$XMMRegister, $mem$$Address);
3009   %}
3010   ins_pipe( pipe_slow );
3011 %}
3012 
3013 // Load vectors (8 bytes long)
3014 instruct loadV8(vecD dst, memory mem) %{
3015   predicate(n->as_LoadVector()->memory_size() == 8);
3016   match(Set dst (LoadVector mem));
3017   ins_cost(125);
3018   format %{ "movq    $dst,$mem\t! load vector (8 bytes)" %}
3019   ins_encode %{
3020     __ movq($dst$$XMMRegister, $mem$$Address);
3021   %}
3022   ins_pipe( pipe_slow );
3023 %}
3024 
3025 // Load vectors (16 bytes long)
3026 instruct loadV16(vecX dst, memory mem) %{
3027   predicate(n->as_LoadVector()->memory_size() == 16);
3028   match(Set dst (LoadVector mem));
3029   ins_cost(125);
3030   format %{ "movdqu  $dst,$mem\t! load vector (16 bytes)" %}
3031   ins_encode %{
3032     __ movdqu($dst$$XMMRegister, $mem$$Address);
3033   %}
3034   ins_pipe( pipe_slow );
3035 %}
3036 
3037 // Load vectors (32 bytes long)
3038 instruct loadV32(vecY dst, memory mem) %{
3039   predicate(n->as_LoadVector()->memory_size() == 32);
3040   match(Set dst (LoadVector mem));
3041   ins_cost(125);
3042   format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
3043   ins_encode %{
3044     __ vmovdqu($dst$$XMMRegister, $mem$$Address);
3045   %}
3046   ins_pipe( pipe_slow );
3047 %}
3048 
3049 // Load vectors (64 bytes long)
3050 instruct loadV64(vecZ dst, memory mem) %{
3051   predicate(n->as_LoadVector()->memory_size() == 64);
3052   match(Set dst (LoadVector mem));
3053   ins_cost(125);
3054   format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %}
3055   ins_encode %{
3056     int vector_len = 2;
3057     __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len);
3058   %}
3059   ins_pipe( pipe_slow );
3060 %}
3061 
3062 // Store vectors
3063 instruct storeV4(memory mem, vecS src) %{
3064   predicate(n->as_StoreVector()->memory_size() == 4);
3065   match(Set mem (StoreVector mem src));
3066   ins_cost(145);
3067   format %{ "movd    $mem,$src\t! store vector (4 bytes)" %}
3068   ins_encode %{
3069     __ movdl($mem$$Address, $src$$XMMRegister);
3070   %}
3071   ins_pipe( pipe_slow );
3072 %}
3073 
3074 instruct storeV8(memory mem, vecD src) %{
3075   predicate(n->as_StoreVector()->memory_size() == 8);
3076   match(Set mem (StoreVector mem src));
3077   ins_cost(145);
3078   format %{ "movq    $mem,$src\t! store vector (8 bytes)" %}
3079   ins_encode %{
3080     __ movq($mem$$Address, $src$$XMMRegister);
3081   %}
3082   ins_pipe( pipe_slow );
3083 %}
3084 
3085 instruct storeV16(memory mem, vecX src) %{
3086   predicate(n->as_StoreVector()->memory_size() == 16);
3087   match(Set mem (StoreVector mem src));
3088   ins_cost(145);
3089   format %{ "movdqu  $mem,$src\t! store vector (16 bytes)" %}
3090   ins_encode %{
3091     __ movdqu($mem$$Address, $src$$XMMRegister);
3092   %}
3093   ins_pipe( pipe_slow );
3094 %}
3095 
3096 instruct storeV32(memory mem, vecY src) %{
3097   predicate(n->as_StoreVector()->memory_size() == 32);
3098   match(Set mem (StoreVector mem src));
3099   ins_cost(145);
3100   format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
3101   ins_encode %{
3102     __ vmovdqu($mem$$Address, $src$$XMMRegister);
3103   %}
3104   ins_pipe( pipe_slow );
3105 %}
3106 
3107 instruct storeV64(memory mem, vecZ src) %{
3108   predicate(n->as_StoreVector()->memory_size() == 64);
3109   match(Set mem (StoreVector mem src));
3110   ins_cost(145);
3111   format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %}
3112   ins_encode %{
3113     int vector_len = 2;
3114     __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len);
3115   %}
3116   ins_pipe( pipe_slow );
3117 %}
3118 
3119 // ====================LEGACY REPLICATE=======================================
3120 
3121 instruct Repl4B_mem(vecS dst, memory mem) %{
3122   predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
3123   match(Set dst (ReplicateB (LoadB mem)));
3124   format %{ "punpcklbw $dst,$mem\n\t"
3125             "pshuflw $dst,$dst,0x00\t! replicate4B" %}
3126   ins_encode %{
3127     __ punpcklbw($dst$$XMMRegister, $mem$$Address);
3128     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3129   %}
3130   ins_pipe( pipe_slow );
3131 %}
3132 
3133 instruct Repl8B_mem(vecD dst, memory mem) %{
3134   predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
3135   match(Set dst (ReplicateB (LoadB mem)));
3136   format %{ "punpcklbw $dst,$mem\n\t"
3137             "pshuflw $dst,$dst,0x00\t! replicate8B" %}
3138   ins_encode %{
3139     __ punpcklbw($dst$$XMMRegister, $mem$$Address);
3140     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3141   %}
3142   ins_pipe( pipe_slow );
3143 %}
3144 
3145 instruct Repl16B(vecX dst, rRegI src) %{
3146   predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
3147   match(Set dst (ReplicateB src));
3148   format %{ "movd    $dst,$src\n\t"
3149             "punpcklbw $dst,$dst\n\t"
3150             "pshuflw $dst,$dst,0x00\n\t"
3151             "punpcklqdq $dst,$dst\t! replicate16B" %}
3152   ins_encode %{
3153     __ movdl($dst$$XMMRegister, $src$$Register);
3154     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
3155     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3156     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3157   %}
3158   ins_pipe( pipe_slow );
3159 %}
3160 
3161 instruct Repl16B_mem(vecX dst, memory mem) %{
3162   predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
3163   match(Set dst (ReplicateB (LoadB mem)));
3164   format %{ "punpcklbw $dst,$mem\n\t"
3165             "pshuflw $dst,$dst,0x00\n\t"
3166             "punpcklqdq $dst,$dst\t! replicate16B" %}
3167   ins_encode %{
3168     __ punpcklbw($dst$$XMMRegister, $mem$$Address);
3169     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3170     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3171   %}
3172   ins_pipe( pipe_slow );
3173 %}
3174 
3175 instruct Repl32B(vecY dst, rRegI src) %{
3176   predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw());
3177   match(Set dst (ReplicateB src));
3178   format %{ "movd    $dst,$src\n\t"
3179             "punpcklbw $dst,$dst\n\t"
3180             "pshuflw $dst,$dst,0x00\n\t"
3181             "punpcklqdq $dst,$dst\n\t"
3182             "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
3183   ins_encode %{
3184     __ movdl($dst$$XMMRegister, $src$$Register);
3185     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
3186     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3187     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3188     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3189   %}
3190   ins_pipe( pipe_slow );
3191 %}
3192 
3193 instruct Repl32B_mem(vecY dst, memory mem) %{
3194   predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw());
3195   match(Set dst (ReplicateB (LoadB mem)));
3196   format %{ "punpcklbw $dst,$mem\n\t"
3197             "pshuflw $dst,$dst,0x00\n\t"
3198             "punpcklqdq $dst,$dst\n\t"
3199             "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
3200   ins_encode %{
3201     __ punpcklbw($dst$$XMMRegister, $mem$$Address);
3202     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3203     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3204     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3205   %}
3206   ins_pipe( pipe_slow );
3207 %}
3208 
3209 instruct Repl16B_imm(vecX dst, immI con) %{
3210   predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
3211   match(Set dst (ReplicateB con));
3212   format %{ "movq    $dst,[$constantaddress]\n\t"
3213             "punpcklqdq $dst,$dst\t! replicate16B($con)" %}
3214   ins_encode %{
3215     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3216     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3217   %}
3218   ins_pipe( pipe_slow );
3219 %}
3220 
3221 instruct Repl32B_imm(vecY dst, immI con) %{
3222   predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw());
3223   match(Set dst (ReplicateB con));
3224   format %{ "movq    $dst,[$constantaddress]\n\t"
3225             "punpcklqdq $dst,$dst\n\t"
3226             "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
3227   ins_encode %{
3228     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3229     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3230     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3231   %}
3232   ins_pipe( pipe_slow );
3233 %}
3234 
3235 instruct Repl4S(vecD dst, rRegI src) %{
3236   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw());
3237   match(Set dst (ReplicateS src));
3238   format %{ "movd    $dst,$src\n\t"
3239             "pshuflw $dst,$dst,0x00\t! replicate4S" %}
3240   ins_encode %{
3241     __ movdl($dst$$XMMRegister, $src$$Register);
3242     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3243   %}
3244   ins_pipe( pipe_slow );
3245 %}
3246 
3247 instruct Repl4S_mem(vecD dst, memory mem) %{
3248   predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
3249   match(Set dst (ReplicateS (LoadS mem)));
3250   format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %}
3251   ins_encode %{
3252     __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00);
3253   %}
3254   ins_pipe( pipe_slow );
3255 %}
3256 
3257 instruct Repl8S(vecX dst, rRegI src) %{
3258   predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw());
3259   match(Set dst (ReplicateS src));
3260   format %{ "movd    $dst,$src\n\t"
3261             "pshuflw $dst,$dst,0x00\n\t"
3262             "punpcklqdq $dst,$dst\t! replicate8S" %}
3263   ins_encode %{
3264     __ movdl($dst$$XMMRegister, $src$$Register);
3265     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3266     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3267   %}
3268   ins_pipe( pipe_slow );
3269 %}
3270 
3271 instruct Repl8S_mem(vecX dst, memory mem) %{
3272   predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
3273   match(Set dst (ReplicateS (LoadS mem)));
3274   format %{ "pshuflw $dst,$mem,0x00\n\t"
3275             "punpcklqdq $dst,$dst\t! replicate8S" %}
3276   ins_encode %{
3277     __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00);
3278     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3279   %}
3280   ins_pipe( pipe_slow );
3281 %}
3282 
3283 instruct Repl8S_imm(vecX dst, immI con) %{
3284   predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw());
3285   match(Set dst (ReplicateS con));
3286   format %{ "movq    $dst,[$constantaddress]\n\t"
3287             "punpcklqdq $dst,$dst\t! replicate8S($con)" %}
3288   ins_encode %{
3289     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3290     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3291   %}
3292   ins_pipe( pipe_slow );
3293 %}
3294 
3295 instruct Repl16S(vecY dst, rRegI src) %{
3296   predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
3297   match(Set dst (ReplicateS src));
3298   format %{ "movd    $dst,$src\n\t"
3299             "pshuflw $dst,$dst,0x00\n\t"
3300             "punpcklqdq $dst,$dst\n\t"
3301             "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
3302   ins_encode %{
3303     __ movdl($dst$$XMMRegister, $src$$Register);
3304     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3305     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3306     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3307   %}
3308   ins_pipe( pipe_slow );
3309 %}
3310 
3311 instruct Repl16S_mem(vecY dst, memory mem) %{
3312   predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
3313   match(Set dst (ReplicateS (LoadS mem)));
3314   format %{ "pshuflw $dst,$mem,0x00\n\t"
3315             "punpcklqdq $dst,$dst\n\t"
3316             "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
3317   ins_encode %{
3318     __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00);
3319     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3320     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3321   %}
3322   ins_pipe( pipe_slow );
3323 %}
3324 
3325 instruct Repl16S_imm(vecY dst, immI con) %{
3326   predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
3327   match(Set dst (ReplicateS con));
3328   format %{ "movq    $dst,[$constantaddress]\n\t"
3329             "punpcklqdq $dst,$dst\n\t"
3330             "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
3331   ins_encode %{
3332     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3333     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3334     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3335   %}
3336   ins_pipe( pipe_slow );
3337 %}
3338 
3339 instruct Repl4I(vecX dst, rRegI src) %{
3340   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
3341   match(Set dst (ReplicateI src));
3342   format %{ "movd    $dst,$src\n\t"
3343             "pshufd  $dst,$dst,0x00\t! replicate4I" %}
3344   ins_encode %{
3345     __ movdl($dst$$XMMRegister, $src$$Register);
3346     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3347   %}
3348   ins_pipe( pipe_slow );
3349 %}
3350 
3351 instruct Repl4I_mem(vecX dst, memory mem) %{
3352   predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl());
3353   match(Set dst (ReplicateI (LoadI mem)));
3354   format %{ "pshufd  $dst,$mem,0x00\t! replicate4I" %}
3355   ins_encode %{
3356     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
3357   %}
3358   ins_pipe( pipe_slow );
3359 %}
3360 
3361 instruct Repl8I(vecY dst, rRegI src) %{
3362   predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
3363   match(Set dst (ReplicateI src));
3364   format %{ "movd    $dst,$src\n\t"
3365             "pshufd  $dst,$dst,0x00\n\t"
3366             "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
3367   ins_encode %{
3368     __ movdl($dst$$XMMRegister, $src$$Register);
3369     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3370     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3371   %}
3372   ins_pipe( pipe_slow );
3373 %}
3374 
3375 instruct Repl8I_mem(vecY dst, memory mem) %{
3376   predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
3377   match(Set dst (ReplicateI (LoadI mem)));
3378   format %{ "pshufd  $dst,$mem,0x00\n\t"
3379             "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
3380   ins_encode %{
3381     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
3382     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3383   %}
3384   ins_pipe( pipe_slow );
3385 %}
3386 
3387 instruct Repl4I_imm(vecX dst, immI con) %{
3388   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
3389   match(Set dst (ReplicateI con));
3390   format %{ "movq    $dst,[$constantaddress]\t! replicate4I($con)\n\t"
3391             "punpcklqdq $dst,$dst" %}
3392   ins_encode %{
3393     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3394     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3395   %}
3396   ins_pipe( pipe_slow );
3397 %}
3398 
3399 instruct Repl8I_imm(vecY dst, immI con) %{
3400   predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
3401   match(Set dst (ReplicateI con));
3402   format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
3403             "punpcklqdq $dst,$dst\n\t"
3404             "vinserti128h $dst,$dst,$dst" %}
3405   ins_encode %{
3406     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3407     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3408     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3409   %}
3410   ins_pipe( pipe_slow );
3411 %}
3412 
3413 // Long could be loaded into xmm register directly from memory.
3414 instruct Repl2L_mem(vecX dst, memory mem) %{
3415   predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw());
3416   match(Set dst (ReplicateL (LoadL mem)));
3417   format %{ "movq    $dst,$mem\n\t"
3418             "punpcklqdq $dst,$dst\t! replicate2L" %}
3419   ins_encode %{
3420     __ movq($dst$$XMMRegister, $mem$$Address);
3421     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3422   %}
3423   ins_pipe( pipe_slow );
3424 %}
3425 
3426 // Replicate long (8 byte) scalar to be vector
3427 #ifdef _LP64
3428 instruct Repl4L(vecY dst, rRegL src) %{
3429   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
3430   match(Set dst (ReplicateL src));
3431   format %{ "movdq   $dst,$src\n\t"
3432             "punpcklqdq $dst,$dst\n\t"
3433             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
3434   ins_encode %{
3435     __ movdq($dst$$XMMRegister, $src$$Register);
3436     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3437     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3438   %}
3439   ins_pipe( pipe_slow );
3440 %}
3441 #else // _LP64
3442 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
3443   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
3444   match(Set dst (ReplicateL src));
3445   effect(TEMP dst, USE src, TEMP tmp);
3446   format %{ "movdl   $dst,$src.lo\n\t"
3447             "movdl   $tmp,$src.hi\n\t"
3448             "punpckldq $dst,$tmp\n\t"
3449             "punpcklqdq $dst,$dst\n\t"
3450             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
3451   ins_encode %{
3452     __ movdl($dst$$XMMRegister, $src$$Register);
3453     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3454     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3455     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3456     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3457   %}
3458   ins_pipe( pipe_slow );
3459 %}
3460 #endif // _LP64
3461 
3462 instruct Repl4L_imm(vecY dst, immL con) %{
3463   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
3464   match(Set dst (ReplicateL con));
3465   format %{ "movq    $dst,[$constantaddress]\n\t"
3466             "punpcklqdq $dst,$dst\n\t"
3467             "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
3468   ins_encode %{
3469     __ movq($dst$$XMMRegister, $constantaddress($con));
3470     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3471     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3472   %}
3473   ins_pipe( pipe_slow );
3474 %}
3475 
3476 instruct Repl4L_mem(vecY dst, memory mem) %{
3477   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
3478   match(Set dst (ReplicateL (LoadL mem)));
3479   format %{ "movq    $dst,$mem\n\t"
3480             "punpcklqdq $dst,$dst\n\t"
3481             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
3482   ins_encode %{
3483     __ movq($dst$$XMMRegister, $mem$$Address);
3484     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3485     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3486   %}
3487   ins_pipe( pipe_slow );
3488 %}
3489 
3490 instruct Repl2F_mem(vecD dst, memory mem) %{
3491   predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl());
3492   match(Set dst (ReplicateF (LoadF mem)));
3493   format %{ "pshufd  $dst,$mem,0x00\t! replicate2F" %}
3494   ins_encode %{
3495     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
3496   %}
3497   ins_pipe( pipe_slow );
3498 %}
3499 
3500 instruct Repl4F_mem(vecX dst, memory mem) %{
3501   predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl());
3502   match(Set dst (ReplicateF (LoadF mem)));
3503   format %{ "pshufd  $dst,$mem,0x00\t! replicate4F" %}
3504   ins_encode %{
3505     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
3506   %}
3507   ins_pipe( pipe_slow );
3508 %}
3509 
3510 instruct Repl8F(vecY dst, regF src) %{
3511   predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
3512   match(Set dst (ReplicateF src));
3513   format %{ "pshufd  $dst,$src,0x00\n\t"
3514             "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
3515   ins_encode %{
3516     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3517     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3518   %}
3519   ins_pipe( pipe_slow );
3520 %}
3521 
3522 instruct Repl8F_mem(vecY dst, memory mem) %{
3523   predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
3524   match(Set dst (ReplicateF (LoadF mem)));
3525   format %{ "pshufd  $dst,$mem,0x00\n\t"
3526             "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
3527   ins_encode %{
3528     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
3529     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3530   %}
3531   ins_pipe( pipe_slow );
3532 %}
3533 
3534 instruct Repl2F_zero(vecD dst, immF0 zero) %{
3535   predicate(n->as_Vector()->length() == 2 && UseAVX < 3);
3536   match(Set dst (ReplicateF zero));
3537   format %{ "xorps   $dst,$dst\t! replicate2F zero" %}
3538   ins_encode %{
3539     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
3540   %}
3541   ins_pipe( fpu_reg_reg );
3542 %}
3543 
3544 instruct Repl4F_zero(vecX dst, immF0 zero) %{
3545   predicate(n->as_Vector()->length() == 4 && UseAVX < 3);
3546   match(Set dst (ReplicateF zero));
3547   format %{ "xorps   $dst,$dst\t! replicate4F zero" %}
3548   ins_encode %{
3549     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
3550   %}
3551   ins_pipe( fpu_reg_reg );
3552 %}
3553 
3554 instruct Repl8F_zero(vecY dst, immF0 zero) %{
3555   predicate(n->as_Vector()->length() == 8 && UseAVX < 3);
3556   match(Set dst (ReplicateF zero));
3557   format %{ "vxorps  $dst,$dst,$dst\t! replicate8F zero" %}
3558   ins_encode %{
3559     int vector_len = 1;
3560     __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3561   %}
3562   ins_pipe( fpu_reg_reg );
3563 %}
3564 
3565 instruct Repl2D_mem(vecX dst, memory mem) %{
3566   predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl());
3567   match(Set dst (ReplicateD (LoadD mem)));
3568   format %{ "pshufd  $dst,$mem,0x44\t! replicate2D" %}
3569   ins_encode %{
3570     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
3571   %}
3572   ins_pipe( pipe_slow );
3573 %}
3574 
3575 instruct Repl4D(vecY dst, regD src) %{
3576   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
3577   match(Set dst (ReplicateD src));
3578   format %{ "pshufd  $dst,$src,0x44\n\t"
3579             "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
3580   ins_encode %{
3581     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
3582     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3583   %}
3584   ins_pipe( pipe_slow );
3585 %}
3586 
3587 instruct Repl4D_mem(vecY dst, memory mem) %{
3588   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
3589   match(Set dst (ReplicateD (LoadD mem)));
3590   format %{ "pshufd  $dst,$mem,0x44\n\t"
3591             "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
3592   ins_encode %{
3593     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
3594     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3595   %}
3596   ins_pipe( pipe_slow );
3597 %}
3598 
3599 // Replicate double (8 byte) scalar zero to be vector
3600 instruct Repl2D_zero(vecX dst, immD0 zero) %{
3601   predicate(n->as_Vector()->length() == 2 && UseAVX < 3);
3602   match(Set dst (ReplicateD zero));
3603   format %{ "xorpd   $dst,$dst\t! replicate2D zero" %}
3604   ins_encode %{
3605     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
3606   %}
3607   ins_pipe( fpu_reg_reg );
3608 %}
3609 
3610 instruct Repl4D_zero(vecY dst, immD0 zero) %{
3611   predicate(n->as_Vector()->length() == 4 && UseAVX < 3);
3612   match(Set dst (ReplicateD zero));
3613   format %{ "vxorpd  $dst,$dst,$dst,vect256\t! replicate4D zero" %}
3614   ins_encode %{
3615     int vector_len = 1;
3616     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3617   %}
3618   ins_pipe( fpu_reg_reg );
3619 %}
3620 
3621 // ====================GENERIC REPLICATE==========================================
3622 
3623 // Replicate byte scalar to be vector
3624 instruct Repl4B(vecS dst, rRegI src) %{
3625   predicate(n->as_Vector()->length() == 4);
3626   match(Set dst (ReplicateB src));
3627   format %{ "movd    $dst,$src\n\t"
3628             "punpcklbw $dst,$dst\n\t"
3629             "pshuflw $dst,$dst,0x00\t! replicate4B" %}
3630   ins_encode %{
3631     __ movdl($dst$$XMMRegister, $src$$Register);
3632     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
3633     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3634   %}
3635   ins_pipe( pipe_slow );
3636 %}
3637 
3638 instruct Repl8B(vecD dst, rRegI src) %{
3639   predicate(n->as_Vector()->length() == 8);
3640   match(Set dst (ReplicateB src));
3641   format %{ "movd    $dst,$src\n\t"
3642             "punpcklbw $dst,$dst\n\t"
3643             "pshuflw $dst,$dst,0x00\t! replicate8B" %}
3644   ins_encode %{
3645     __ movdl($dst$$XMMRegister, $src$$Register);
3646     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
3647     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3648   %}
3649   ins_pipe( pipe_slow );
3650 %}
3651 
3652 // Replicate byte scalar immediate to be vector by loading from const table.
3653 instruct Repl4B_imm(vecS dst, immI con) %{
3654   predicate(n->as_Vector()->length() == 4);
3655   match(Set dst (ReplicateB con));
3656   format %{ "movdl   $dst,[$constantaddress]\t! replicate4B($con)" %}
3657   ins_encode %{
3658     __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
3659   %}
3660   ins_pipe( pipe_slow );
3661 %}
3662 
3663 instruct Repl8B_imm(vecD dst, immI con) %{
3664   predicate(n->as_Vector()->length() == 8);
3665   match(Set dst (ReplicateB con));
3666   format %{ "movq    $dst,[$constantaddress]\t! replicate8B($con)" %}
3667   ins_encode %{
3668     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3669   %}
3670   ins_pipe( pipe_slow );
3671 %}
3672 
3673 // Replicate byte scalar zero to be vector
3674 instruct Repl4B_zero(vecS dst, immI0 zero) %{
3675   predicate(n->as_Vector()->length() == 4);
3676   match(Set dst (ReplicateB zero));
3677   format %{ "pxor    $dst,$dst\t! replicate4B zero" %}
3678   ins_encode %{
3679     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3680   %}
3681   ins_pipe( fpu_reg_reg );
3682 %}
3683 
3684 instruct Repl8B_zero(vecD dst, immI0 zero) %{
3685   predicate(n->as_Vector()->length() == 8);
3686   match(Set dst (ReplicateB zero));
3687   format %{ "pxor    $dst,$dst\t! replicate8B zero" %}
3688   ins_encode %{
3689     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3690   %}
3691   ins_pipe( fpu_reg_reg );
3692 %}
3693 
3694 instruct Repl16B_zero(vecX dst, immI0 zero) %{
3695   predicate(n->as_Vector()->length() == 16);
3696   match(Set dst (ReplicateB zero));
3697   format %{ "pxor    $dst,$dst\t! replicate16B zero" %}
3698   ins_encode %{
3699     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3700   %}
3701   ins_pipe( fpu_reg_reg );
3702 %}
3703 
3704 instruct Repl32B_zero(vecY dst, immI0 zero) %{
3705   predicate(n->as_Vector()->length() == 32);
3706   match(Set dst (ReplicateB zero));
3707   format %{ "vpxor   $dst,$dst,$dst\t! replicate32B zero" %}
3708   ins_encode %{
3709     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3710     int vector_len = 1;
3711     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3712   %}
3713   ins_pipe( fpu_reg_reg );
3714 %}
3715 
3716 // Replicate char/short (2 byte) scalar to be vector
3717 instruct Repl2S(vecS dst, rRegI src) %{
3718   predicate(n->as_Vector()->length() == 2);
3719   match(Set dst (ReplicateS src));
3720   format %{ "movd    $dst,$src\n\t"
3721             "pshuflw $dst,$dst,0x00\t! replicate2S" %}
3722   ins_encode %{
3723     __ movdl($dst$$XMMRegister, $src$$Register);
3724     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3725   %}
3726   ins_pipe( fpu_reg_reg );
3727 %}
3728 
3729 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
3730 instruct Repl2S_imm(vecS dst, immI con) %{
3731   predicate(n->as_Vector()->length() == 2);
3732   match(Set dst (ReplicateS con));
3733   format %{ "movdl   $dst,[$constantaddress]\t! replicate2S($con)" %}
3734   ins_encode %{
3735     __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
3736   %}
3737   ins_pipe( fpu_reg_reg );
3738 %}
3739 
3740 instruct Repl4S_imm(vecD dst, immI con) %{
3741   predicate(n->as_Vector()->length() == 4);
3742   match(Set dst (ReplicateS con));
3743   format %{ "movq    $dst,[$constantaddress]\t! replicate4S($con)" %}
3744   ins_encode %{
3745     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3746   %}
3747   ins_pipe( fpu_reg_reg );
3748 %}
3749 
3750 // Replicate char/short (2 byte) scalar zero to be vector
3751 instruct Repl2S_zero(vecS dst, immI0 zero) %{
3752   predicate(n->as_Vector()->length() == 2);
3753   match(Set dst (ReplicateS zero));
3754   format %{ "pxor    $dst,$dst\t! replicate2S zero" %}
3755   ins_encode %{
3756     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3757   %}
3758   ins_pipe( fpu_reg_reg );
3759 %}
3760 
3761 instruct Repl4S_zero(vecD dst, immI0 zero) %{
3762   predicate(n->as_Vector()->length() == 4);
3763   match(Set dst (ReplicateS zero));
3764   format %{ "pxor    $dst,$dst\t! replicate4S zero" %}
3765   ins_encode %{
3766     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3767   %}
3768   ins_pipe( fpu_reg_reg );
3769 %}
3770 
3771 instruct Repl8S_zero(vecX dst, immI0 zero) %{
3772   predicate(n->as_Vector()->length() == 8);
3773   match(Set dst (ReplicateS zero));
3774   format %{ "pxor    $dst,$dst\t! replicate8S zero" %}
3775   ins_encode %{
3776     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3777   %}
3778   ins_pipe( fpu_reg_reg );
3779 %}
3780 
3781 instruct Repl16S_zero(vecY dst, immI0 zero) %{
3782   predicate(n->as_Vector()->length() == 16);
3783   match(Set dst (ReplicateS zero));
3784   format %{ "vpxor   $dst,$dst,$dst\t! replicate16S zero" %}
3785   ins_encode %{
3786     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3787     int vector_len = 1;
3788     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3789   %}
3790   ins_pipe( fpu_reg_reg );
3791 %}
3792 
3793 // Replicate integer (4 byte) scalar to be vector
3794 instruct Repl2I(vecD dst, rRegI src) %{
3795   predicate(n->as_Vector()->length() == 2);
3796   match(Set dst (ReplicateI src));
3797   format %{ "movd    $dst,$src\n\t"
3798             "pshufd  $dst,$dst,0x00\t! replicate2I" %}
3799   ins_encode %{
3800     __ movdl($dst$$XMMRegister, $src$$Register);
3801     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3802   %}
3803   ins_pipe( fpu_reg_reg );
3804 %}
3805 
3806 // Integer could be loaded into xmm register directly from memory.
3807 instruct Repl2I_mem(vecD dst, memory mem) %{
3808   predicate(n->as_Vector()->length() == 2);
3809   match(Set dst (ReplicateI (LoadI mem)));
3810   format %{ "movd    $dst,$mem\n\t"
3811             "pshufd  $dst,$dst,0x00\t! replicate2I" %}
3812   ins_encode %{
3813     __ movdl($dst$$XMMRegister, $mem$$Address);
3814     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3815   %}
3816   ins_pipe( fpu_reg_reg );
3817 %}
3818 
3819 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
3820 instruct Repl2I_imm(vecD dst, immI con) %{
3821   predicate(n->as_Vector()->length() == 2);
3822   match(Set dst (ReplicateI con));
3823   format %{ "movq    $dst,[$constantaddress]\t! replicate2I($con)" %}
3824   ins_encode %{
3825     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3826   %}
3827   ins_pipe( fpu_reg_reg );
3828 %}
3829 
3830 // Replicate integer (4 byte) scalar zero to be vector
3831 instruct Repl2I_zero(vecD dst, immI0 zero) %{
3832   predicate(n->as_Vector()->length() == 2);
3833   match(Set dst (ReplicateI zero));
3834   format %{ "pxor    $dst,$dst\t! replicate2I" %}
3835   ins_encode %{
3836     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3837   %}
3838   ins_pipe( fpu_reg_reg );
3839 %}
3840 
3841 instruct Repl4I_zero(vecX dst, immI0 zero) %{
3842   predicate(n->as_Vector()->length() == 4);
3843   match(Set dst (ReplicateI zero));
3844   format %{ "pxor    $dst,$dst\t! replicate4I zero)" %}
3845   ins_encode %{
3846     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3847   %}
3848   ins_pipe( fpu_reg_reg );
3849 %}
3850 
3851 instruct Repl8I_zero(vecY dst, immI0 zero) %{
3852   predicate(n->as_Vector()->length() == 8);
3853   match(Set dst (ReplicateI zero));
3854   format %{ "vpxor   $dst,$dst,$dst\t! replicate8I zero" %}
3855   ins_encode %{
3856     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3857     int vector_len = 1;
3858     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3859   %}
3860   ins_pipe( fpu_reg_reg );
3861 %}
3862 
3863 // Replicate long (8 byte) scalar to be vector
3864 #ifdef _LP64
3865 instruct Repl2L(vecX dst, rRegL src) %{
3866   predicate(n->as_Vector()->length() == 2);
3867   match(Set dst (ReplicateL src));
3868   format %{ "movdq   $dst,$src\n\t"
3869             "punpcklqdq $dst,$dst\t! replicate2L" %}
3870   ins_encode %{
3871     __ movdq($dst$$XMMRegister, $src$$Register);
3872     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3873   %}
3874   ins_pipe( pipe_slow );
3875 %}
3876 #else // _LP64
3877 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
3878   predicate(n->as_Vector()->length() == 2);
3879   match(Set dst (ReplicateL src));
3880   effect(TEMP dst, USE src, TEMP tmp);
3881   format %{ "movdl   $dst,$src.lo\n\t"
3882             "movdl   $tmp,$src.hi\n\t"
3883             "punpckldq $dst,$tmp\n\t"
3884             "punpcklqdq $dst,$dst\t! replicate2L"%}
3885   ins_encode %{
3886     __ movdl($dst$$XMMRegister, $src$$Register);
3887     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3888     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3889     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3890   %}
3891   ins_pipe( pipe_slow );
3892 %}
3893 #endif // _LP64
3894 
3895 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
3896 instruct Repl2L_imm(vecX dst, immL con) %{
3897   predicate(n->as_Vector()->length() == 2);
3898   match(Set dst (ReplicateL con));
3899   format %{ "movq    $dst,[$constantaddress]\n\t"
3900             "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
3901   ins_encode %{
3902     __ movq($dst$$XMMRegister, $constantaddress($con));
3903     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3904   %}
3905   ins_pipe( pipe_slow );
3906 %}
3907 
3908 // Replicate long (8 byte) scalar zero to be vector
3909 instruct Repl2L_zero(vecX dst, immL0 zero) %{
3910   predicate(n->as_Vector()->length() == 2);
3911   match(Set dst (ReplicateL zero));
3912   format %{ "pxor    $dst,$dst\t! replicate2L zero" %}
3913   ins_encode %{
3914     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3915   %}
3916   ins_pipe( fpu_reg_reg );
3917 %}
3918 
3919 instruct Repl4L_zero(vecY dst, immL0 zero) %{
3920   predicate(n->as_Vector()->length() == 4);
3921   match(Set dst (ReplicateL zero));
3922   format %{ "vpxor   $dst,$dst,$dst\t! replicate4L zero" %}
3923   ins_encode %{
3924     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3925     int vector_len = 1;
3926     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3927   %}
3928   ins_pipe( fpu_reg_reg );
3929 %}
3930 
3931 // Replicate float (4 byte) scalar to be vector
3932 instruct Repl2F(vecD dst, regF src) %{
3933   predicate(n->as_Vector()->length() == 2);
3934   match(Set dst (ReplicateF src));
3935   format %{ "pshufd  $dst,$dst,0x00\t! replicate2F" %}
3936   ins_encode %{
3937     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3938   %}
3939   ins_pipe( fpu_reg_reg );
3940 %}
3941 
3942 instruct Repl4F(vecX dst, regF src) %{
3943   predicate(n->as_Vector()->length() == 4);
3944   match(Set dst (ReplicateF src));
3945   format %{ "pshufd  $dst,$dst,0x00\t! replicate4F" %}
3946   ins_encode %{
3947     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3948   %}
3949   ins_pipe( pipe_slow );
3950 %}
3951 
3952 // Replicate double (8 bytes) scalar to be vector
3953 instruct Repl2D(vecX dst, regD src) %{
3954   predicate(n->as_Vector()->length() == 2);
3955   match(Set dst (ReplicateD src));
3956   format %{ "pshufd  $dst,$src,0x44\t! replicate2D" %}
3957   ins_encode %{
3958     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
3959   %}
3960   ins_pipe( pipe_slow );
3961 %}
3962 
3963 // ====================EVEX REPLICATE=============================================
3964 
3965 instruct Repl4B_mem_evex(vecS dst, memory mem) %{
3966   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw());
3967   match(Set dst (ReplicateB (LoadB mem)));
3968   format %{ "vpbroadcastb  $dst,$mem\t! replicate4B" %}
3969   ins_encode %{
3970     int vector_len = 0;
3971     __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
3972   %}
3973   ins_pipe( pipe_slow );
3974 %}
3975 
3976 instruct Repl8B_mem_evex(vecD dst, memory mem) %{
3977   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
3978   match(Set dst (ReplicateB (LoadB mem)));
3979   format %{ "vpbroadcastb  $dst,$mem\t! replicate8B" %}
3980   ins_encode %{
3981     int vector_len = 0;
3982     __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
3983   %}
3984   ins_pipe( pipe_slow );
3985 %}
3986 
3987 instruct Repl16B_evex(vecX dst, rRegI src) %{
3988   predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
3989   match(Set dst (ReplicateB src));
3990   format %{ "vpbroadcastb $dst,$src\t! replicate16B" %}
3991   ins_encode %{
3992    int vector_len = 0;
3993     __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len);
3994   %}
3995   ins_pipe( pipe_slow );
3996 %}
3997 
3998 instruct Repl16B_mem_evex(vecX dst, memory mem) %{
3999   predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
4000   match(Set dst (ReplicateB (LoadB mem)));
4001   format %{ "vpbroadcastb  $dst,$mem\t! replicate16B" %}
4002   ins_encode %{
4003     int vector_len = 0;
4004     __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
4005   %}
4006   ins_pipe( pipe_slow );
4007 %}
4008 
4009 instruct Repl32B_evex(vecY dst, rRegI src) %{
4010   predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw());
4011   match(Set dst (ReplicateB src));
4012   format %{ "vpbroadcastb $dst,$src\t! replicate32B" %}
4013   ins_encode %{
4014    int vector_len = 1;
4015     __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len);
4016   %}
4017   ins_pipe( pipe_slow );
4018 %}
4019 
4020 instruct Repl32B_mem_evex(vecY dst, memory mem) %{
4021   predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw());
4022   match(Set dst (ReplicateB (LoadB mem)));
4023   format %{ "vpbroadcastb  $dst,$mem\t! replicate32B" %}
4024   ins_encode %{
4025     int vector_len = 1;
4026     __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
4027   %}
4028   ins_pipe( pipe_slow );
4029 %}
4030 
4031 instruct Repl64B_evex(vecZ dst, rRegI src) %{
4032   predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw());
4033   match(Set dst (ReplicateB src));
4034   format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %}
4035   ins_encode %{
4036    int vector_len = 2;
4037     __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len);
4038   %}
4039   ins_pipe( pipe_slow );
4040 %}
4041 
4042 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{
4043   predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw());
4044   match(Set dst (ReplicateB (LoadB mem)));
4045   format %{ "vpbroadcastb  $dst,$mem\t! replicate64B" %}
4046   ins_encode %{
4047     int vector_len = 2;
4048     __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
4049   %}
4050   ins_pipe( pipe_slow );
4051 %}
4052 
4053 instruct Repl16B_imm_evex(vecX dst, immI con) %{
4054   predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
4055   match(Set dst (ReplicateB con));
4056   format %{ "movq    $dst,[$constantaddress]\n\t"
4057             "vpbroadcastb $dst,$dst\t! replicate16B" %}
4058   ins_encode %{
4059    int vector_len = 0;
4060     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
4061     __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4062   %}
4063   ins_pipe( pipe_slow );
4064 %}
4065 
4066 instruct Repl32B_imm_evex(vecY dst, immI con) %{
4067   predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw());
4068   match(Set dst (ReplicateB con));
4069   format %{ "movq    $dst,[$constantaddress]\n\t"
4070             "vpbroadcastb $dst,$dst\t! replicate32B" %}
4071   ins_encode %{
4072    int vector_len = 1;
4073     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
4074     __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4075   %}
4076   ins_pipe( pipe_slow );
4077 %}
4078 
4079 instruct Repl64B_imm_evex(vecZ dst, immI con) %{
4080   predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw());
4081   match(Set dst (ReplicateB con));
4082   format %{ "movq    $dst,[$constantaddress]\n\t"
4083             "vpbroadcastb $dst,$dst\t! upper replicate64B" %}
4084   ins_encode %{
4085    int vector_len = 2;
4086     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
4087     __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4088   %}
4089   ins_pipe( pipe_slow );
4090 %}
4091 
4092 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{
4093   predicate(n->as_Vector()->length() == 64 && UseAVX > 2);
4094   match(Set dst (ReplicateB zero));
4095   format %{ "vpxor   $dst k0,$dst,$dst\t! replicate64B zero" %}
4096   ins_encode %{
4097     // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
4098     int vector_len = 2;
4099     __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4100   %}
4101   ins_pipe( fpu_reg_reg );
4102 %}
4103 
4104 instruct Repl4S_evex(vecD dst, rRegI src) %{
4105   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw());
4106   match(Set dst (ReplicateS src));
4107   format %{ "vpbroadcastw $dst,$src\t! replicate4S" %}
4108   ins_encode %{
4109    int vector_len = 0;
4110     __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
4111   %}
4112   ins_pipe( pipe_slow );
4113 %}
4114 
4115 instruct Repl4S_mem_evex(vecD dst, memory mem) %{
4116   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw());
4117   match(Set dst (ReplicateS (LoadS mem)));
4118   format %{ "vpbroadcastw  $dst,$mem\t! replicate4S" %}
4119   ins_encode %{
4120     int vector_len = 0;
4121     __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
4122   %}
4123   ins_pipe( pipe_slow );
4124 %}
4125 
4126 instruct Repl8S_evex(vecX dst, rRegI src) %{
4127   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
4128   match(Set dst (ReplicateS src));
4129   format %{ "vpbroadcastw $dst,$src\t! replicate8S" %}
4130   ins_encode %{
4131    int vector_len = 0;
4132     __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
4133   %}
4134   ins_pipe( pipe_slow );
4135 %}
4136 
4137 instruct Repl8S_mem_evex(vecX dst, memory mem) %{
4138   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
4139   match(Set dst (ReplicateS (LoadS mem)));
4140   format %{ "vpbroadcastw  $dst,$mem\t! replicate8S" %}
4141   ins_encode %{
4142     int vector_len = 0;
4143     __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
4144   %}
4145   ins_pipe( pipe_slow );
4146 %}
4147 
4148 instruct Repl16S_evex(vecY dst, rRegI src) %{
4149   predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
4150   match(Set dst (ReplicateS src));
4151   format %{ "vpbroadcastw $dst,$src\t! replicate16S" %}
4152   ins_encode %{
4153    int vector_len = 1;
4154     __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
4155   %}
4156   ins_pipe( pipe_slow );
4157 %}
4158 
4159 instruct Repl16S_mem_evex(vecY dst, memory mem) %{
4160   predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
4161   match(Set dst (ReplicateS (LoadS mem)));
4162   format %{ "vpbroadcastw  $dst,$mem\t! replicate16S" %}
4163   ins_encode %{
4164     int vector_len = 1;
4165     __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
4166   %}
4167   ins_pipe( pipe_slow );
4168 %}
4169 
4170 instruct Repl32S_evex(vecZ dst, rRegI src) %{
4171   predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw());
4172   match(Set dst (ReplicateS src));
4173   format %{ "vpbroadcastw $dst,$src\t! replicate32S" %}
4174   ins_encode %{
4175    int vector_len = 2;
4176     __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
4177   %}
4178   ins_pipe( pipe_slow );
4179 %}
4180 
4181 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{
4182   predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw());
4183   match(Set dst (ReplicateS (LoadS mem)));
4184   format %{ "vpbroadcastw  $dst,$mem\t! replicate32S" %}
4185   ins_encode %{
4186     int vector_len = 2;
4187     __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
4188   %}
4189   ins_pipe( pipe_slow );
4190 %}
4191 
4192 instruct Repl8S_imm_evex(vecX dst, immI con) %{
4193   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
4194   match(Set dst (ReplicateS con));
4195   format %{ "movq    $dst,[$constantaddress]\n\t"
4196             "vpbroadcastw $dst,$dst\t! replicate8S" %}
4197   ins_encode %{
4198    int vector_len = 0;
4199     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
4200     __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4201   %}
4202   ins_pipe( pipe_slow );
4203 %}
4204 
4205 instruct Repl16S_imm_evex(vecY dst, immI con) %{
4206   predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
4207   match(Set dst (ReplicateS con));
4208   format %{ "movq    $dst,[$constantaddress]\n\t"
4209             "vpbroadcastw $dst,$dst\t! replicate16S" %}
4210   ins_encode %{
4211    int vector_len = 1;
4212     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
4213     __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4214   %}
4215   ins_pipe( pipe_slow );
4216 %}
4217 
4218 instruct Repl32S_imm_evex(vecZ dst, immI con) %{
4219   predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw());
4220   match(Set dst (ReplicateS con));
4221   format %{ "movq    $dst,[$constantaddress]\n\t"
4222             "vpbroadcastw $dst,$dst\t! replicate32S" %}
4223   ins_encode %{
4224    int vector_len = 2;
4225     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
4226     __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4227   %}
4228   ins_pipe( pipe_slow );
4229 %}
4230 
4231 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{
4232   predicate(n->as_Vector()->length() == 32 && UseAVX > 2);
4233   match(Set dst (ReplicateS zero));
4234   format %{ "vpxor   $dst k0,$dst,$dst\t! replicate32S zero" %}
4235   ins_encode %{
4236     // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
4237     int vector_len = 2;
4238     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4239   %}
4240   ins_pipe( fpu_reg_reg );
4241 %}
4242 
4243 instruct Repl4I_evex(vecX dst, rRegI src) %{
4244   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4245   match(Set dst (ReplicateI src));
4246   format %{ "vpbroadcastd  $dst,$src\t! replicate4I" %}
4247   ins_encode %{
4248     int vector_len = 0;
4249     __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len);
4250   %}
4251   ins_pipe( pipe_slow );
4252 %}
4253 
4254 instruct Repl4I_mem_evex(vecX dst, memory mem) %{
4255   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4256   match(Set dst (ReplicateI (LoadI mem)));
4257   format %{ "vpbroadcastd  $dst,$mem\t! replicate4I" %}
4258   ins_encode %{
4259     int vector_len = 0;
4260     __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
4261   %}
4262   ins_pipe( pipe_slow );
4263 %}
4264 
4265 instruct Repl8I_evex(vecY dst, rRegI src) %{
4266   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
4267   match(Set dst (ReplicateI src));
4268   format %{ "vpbroadcastd  $dst,$src\t! replicate8I" %}
4269   ins_encode %{
4270     int vector_len = 1;
4271     __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len);
4272   %}
4273   ins_pipe( pipe_slow );
4274 %}
4275 
4276 instruct Repl8I_mem_evex(vecY dst, memory mem) %{
4277   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
4278   match(Set dst (ReplicateI (LoadI mem)));
4279   format %{ "vpbroadcastd  $dst,$mem\t! replicate8I" %}
4280   ins_encode %{
4281     int vector_len = 1;
4282     __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
4283   %}
4284   ins_pipe( pipe_slow );
4285 %}
4286 
4287 instruct Repl16I_evex(vecZ dst, rRegI src) %{
4288   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
4289   match(Set dst (ReplicateI src));
4290   format %{ "vpbroadcastd  $dst,$src\t! replicate16I" %}
4291   ins_encode %{
4292     int vector_len = 2;
4293     __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len);
4294   %}
4295   ins_pipe( pipe_slow );
4296 %}
4297 
4298 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{
4299   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
4300   match(Set dst (ReplicateI (LoadI mem)));
4301   format %{ "vpbroadcastd  $dst,$mem\t! replicate16I" %}
4302   ins_encode %{
4303     int vector_len = 2;
4304     __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
4305   %}
4306   ins_pipe( pipe_slow );
4307 %}
4308 
4309 instruct Repl4I_imm_evex(vecX dst, immI con) %{
4310   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4311   match(Set dst (ReplicateI con));
4312   format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
4313             "vpbroadcastd  $dst,$dst\t! replicate4I" %}
4314   ins_encode %{
4315     int vector_len = 0;
4316     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
4317     __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4318   %}
4319   ins_pipe( pipe_slow );
4320 %}
4321 
4322 instruct Repl8I_imm_evex(vecY dst, immI con) %{
4323   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
4324   match(Set dst (ReplicateI con));
4325   format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
4326             "vpbroadcastd  $dst,$dst\t! replicate8I" %}
4327   ins_encode %{
4328     int vector_len = 1;
4329     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
4330     __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4331   %}
4332   ins_pipe( pipe_slow );
4333 %}
4334 
4335 instruct Repl16I_imm_evex(vecZ dst, immI con) %{
4336   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
4337   match(Set dst (ReplicateI con));
4338   format %{ "movq    $dst,[$constantaddress]\t! replicate16I($con)\n\t"
4339             "vpbroadcastd  $dst,$dst\t! replicate16I" %}
4340   ins_encode %{
4341     int vector_len = 2;
4342     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
4343     __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4344   %}
4345   ins_pipe( pipe_slow );
4346 %}
4347 
4348 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{
4349   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
4350   match(Set dst (ReplicateI zero));
4351   format %{ "vpxor   $dst k0,$dst,$dst\t! replicate16I zero" %}
4352   ins_encode %{
4353     // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it).
4354     int vector_len = 2;
4355     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4356   %}
4357   ins_pipe( fpu_reg_reg );
4358 %}
4359 
4360 // Replicate long (8 byte) scalar to be vector
4361 #ifdef _LP64
4362 instruct Repl4L_evex(vecY dst, rRegL src) %{
4363   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4364   match(Set dst (ReplicateL src));
4365   format %{ "vpbroadcastq  $dst,$src\t! replicate4L" %}
4366   ins_encode %{
4367     int vector_len = 1;
4368     __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len);
4369   %}
4370   ins_pipe( pipe_slow );
4371 %}
4372 
4373 instruct Repl8L_evex(vecZ dst, rRegL src) %{
4374   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4375   match(Set dst (ReplicateL src));
4376   format %{ "vpbroadcastq  $dst,$src\t! replicate8L" %}
4377   ins_encode %{
4378     int vector_len = 2;
4379     __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len);
4380   %}
4381   ins_pipe( pipe_slow );
4382 %}
4383 #else // _LP64
4384 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{
4385   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4386   match(Set dst (ReplicateL src));
4387   effect(TEMP dst, USE src, TEMP tmp);
4388   format %{ "movdl   $dst,$src.lo\n\t"
4389             "movdl   $tmp,$src.hi\n\t"
4390             "punpckldq $dst,$tmp\n\t"
4391             "vpbroadcastq  $dst,$dst\t! replicate4L" %}
4392   ins_encode %{
4393     int vector_len = 1;
4394     __ movdl($dst$$XMMRegister, $src$$Register);
4395     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
4396     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
4397     __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4398   %}
4399   ins_pipe( pipe_slow );
4400 %}
4401 
4402 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{
4403   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4404   match(Set dst (ReplicateL src));
4405   effect(TEMP dst, USE src, TEMP tmp);
4406   format %{ "movdl   $dst,$src.lo\n\t"
4407             "movdl   $tmp,$src.hi\n\t"
4408             "punpckldq $dst,$tmp\n\t"
4409             "vpbroadcastq  $dst,$dst\t! replicate8L" %}
4410   ins_encode %{
4411     int vector_len = 2;
4412     __ movdl($dst$$XMMRegister, $src$$Register);
4413     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
4414     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
4415     __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4416   %}
4417   ins_pipe( pipe_slow );
4418 %}
4419 #endif // _LP64
4420 
4421 instruct Repl4L_imm_evex(vecY dst, immL con) %{
4422   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4423   match(Set dst (ReplicateL con));
4424   format %{ "movq    $dst,[$constantaddress]\n\t"
4425             "vpbroadcastq  $dst,$dst\t! replicate4L" %}
4426   ins_encode %{
4427     int vector_len = 1;
4428     __ movq($dst$$XMMRegister, $constantaddress($con));
4429     __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4430   %}
4431   ins_pipe( pipe_slow );
4432 %}
4433 
4434 instruct Repl8L_imm_evex(vecZ dst, immL con) %{
4435   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4436   match(Set dst (ReplicateL con));
4437   format %{ "movq    $dst,[$constantaddress]\n\t"
4438             "vpbroadcastq  $dst,$dst\t! replicate8L" %}
4439   ins_encode %{
4440     int vector_len = 2;
4441     __ movq($dst$$XMMRegister, $constantaddress($con));
4442     __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4443   %}
4444   ins_pipe( pipe_slow );
4445 %}
4446 
4447 instruct Repl2L_mem_evex(vecX dst, memory mem) %{
4448   predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl());
4449   match(Set dst (ReplicateL (LoadL mem)));
4450   format %{ "vpbroadcastd  $dst,$mem\t! replicate2L" %}
4451   ins_encode %{
4452     int vector_len = 0;
4453     __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
4454   %}
4455   ins_pipe( pipe_slow );
4456 %}
4457 
4458 instruct Repl4L_mem_evex(vecY dst, memory mem) %{
4459   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4460   match(Set dst (ReplicateL (LoadL mem)));
4461   format %{ "vpbroadcastd  $dst,$mem\t! replicate4L" %}
4462   ins_encode %{
4463     int vector_len = 1;
4464     __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
4465   %}
4466   ins_pipe( pipe_slow );
4467 %}
4468 
4469 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{
4470   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4471   match(Set dst (ReplicateL (LoadL mem)));
4472   format %{ "vpbroadcastd  $dst,$mem\t! replicate8L" %}
4473   ins_encode %{
4474     int vector_len = 2;
4475     __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
4476   %}
4477   ins_pipe( pipe_slow );
4478 %}
4479 
4480 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{
4481   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4482   match(Set dst (ReplicateL zero));
4483   format %{ "vpxor   $dst k0,$dst,$dst\t! replicate8L zero" %}
4484   ins_encode %{
4485     // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
4486     int vector_len = 2;
4487     __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4488   %}
4489   ins_pipe( fpu_reg_reg );
4490 %}
4491 
4492 instruct Repl8F_evex(vecY dst, regF src) %{
4493   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
4494   match(Set dst (ReplicateF src));
4495   format %{ "vbroadcastss $dst,$src\t! replicate8F" %}
4496   ins_encode %{
4497     int vector_len = 1;
4498     __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len);
4499   %}
4500   ins_pipe( pipe_slow );
4501 %}
4502 
4503 instruct Repl8F_mem_evex(vecY dst, memory mem) %{
4504   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
4505   match(Set dst (ReplicateF (LoadF mem)));
4506   format %{ "vbroadcastss  $dst,$mem\t! replicate8F" %}
4507   ins_encode %{
4508     int vector_len = 1;
4509     __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
4510   %}
4511   ins_pipe( pipe_slow );
4512 %}
4513 
4514 instruct Repl16F_evex(vecZ dst, regF src) %{
4515   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
4516   match(Set dst (ReplicateF src));
4517   format %{ "vbroadcastss $dst,$src\t! replicate16F" %}
4518   ins_encode %{
4519     int vector_len = 2;
4520     __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len);
4521   %}
4522   ins_pipe( pipe_slow );
4523 %}
4524 
4525 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{
4526   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
4527   match(Set dst (ReplicateF (LoadF mem)));
4528   format %{ "vbroadcastss  $dst,$mem\t! replicate16F" %}
4529   ins_encode %{
4530     int vector_len = 2;
4531     __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
4532   %}
4533   ins_pipe( pipe_slow );
4534 %}
4535 
4536 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{
4537   predicate(n->as_Vector()->length() == 2 && UseAVX > 2);
4538   match(Set dst (ReplicateF zero));
4539   format %{ "vpxor  $dst k0,$dst,$dst\t! replicate2F zero" %}
4540   ins_encode %{
4541     // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation
4542     int vector_len = 2;
4543     __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4544   %}
4545   ins_pipe( fpu_reg_reg );
4546 %}
4547 
4548 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{
4549   predicate(n->as_Vector()->length() == 4 && UseAVX > 2);
4550   match(Set dst (ReplicateF zero));
4551   format %{ "vpxor  $dst k0,$dst,$dst\t! replicate4F zero" %}
4552   ins_encode %{
4553     // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation
4554     int vector_len = 2;
4555     __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4556   %}
4557   ins_pipe( fpu_reg_reg );
4558 %}
4559 
4560 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{
4561   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4562   match(Set dst (ReplicateF zero));
4563   format %{ "vpxor  $dst k0,$dst,$dst\t! replicate8F zero" %}
4564   ins_encode %{
4565     // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation
4566     int vector_len = 2;
4567     __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4568   %}
4569   ins_pipe( fpu_reg_reg );
4570 %}
4571 
4572 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{
4573   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
4574   match(Set dst (ReplicateF zero));
4575   format %{ "vpxor  $dst k0,$dst,$dst\t! replicate16F zero" %}
4576   ins_encode %{
4577     // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation
4578     int vector_len = 2;
4579     __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4580   %}
4581   ins_pipe( fpu_reg_reg );
4582 %}
4583 
4584 instruct Repl4D_evex(vecY dst, regD src) %{
4585   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4586   match(Set dst (ReplicateD src));
4587   format %{ "vbroadcastsd $dst,$src\t! replicate4D" %}
4588   ins_encode %{
4589     int vector_len = 1;
4590     __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
4591   %}
4592   ins_pipe( pipe_slow );
4593 %}
4594 
4595 instruct Repl4D_mem_evex(vecY dst, memory mem) %{
4596   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4597   match(Set dst (ReplicateD (LoadD mem)));
4598   format %{ "vbroadcastsd  $dst,$mem\t! replicate4D" %}
4599   ins_encode %{
4600     int vector_len = 1;
4601     __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
4602   %}
4603   ins_pipe( pipe_slow );
4604 %}
4605 
4606 instruct Repl8D_evex(vecZ dst, regD src) %{
4607   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4608   match(Set dst (ReplicateD src));
4609   format %{ "vbroadcastsd $dst,$src\t! replicate8D" %}
4610   ins_encode %{
4611     int vector_len = 2;
4612     __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
4613   %}
4614   ins_pipe( pipe_slow );
4615 %}
4616 
4617 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{
4618   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4619   match(Set dst (ReplicateD (LoadD mem)));
4620   format %{ "vbroadcastsd  $dst,$mem\t! replicate8D" %}
4621   ins_encode %{
4622     int vector_len = 2;
4623     __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
4624   %}
4625   ins_pipe( pipe_slow );
4626 %}
4627 
4628 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{
4629   predicate(n->as_Vector()->length() == 2 && UseAVX > 2);
4630   match(Set dst (ReplicateD zero));
4631   format %{ "vpxor  $dst k0,$dst,$dst\t! replicate2D zero" %}
4632   ins_encode %{
4633     // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation
4634     int vector_len = 2;
4635     __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4636   %}
4637   ins_pipe( fpu_reg_reg );
4638 %}
4639 
4640 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{
4641   predicate(n->as_Vector()->length() == 4 && UseAVX > 2);
4642   match(Set dst (ReplicateD zero));
4643   format %{ "vpxor  $dst k0,$dst,$dst\t! replicate4D zero" %}
4644   ins_encode %{
4645     // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation
4646     int vector_len = 2;
4647     __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4648   %}
4649   ins_pipe( fpu_reg_reg );
4650 %}
4651 
4652 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{
4653   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4654   match(Set dst (ReplicateD zero));
4655   format %{ "vpxor  $dst k0,$dst,$dst,vect512\t! replicate8D zero" %}
4656   ins_encode %{
4657     // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation
4658     int vector_len = 2;
4659     __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4660   %}
4661   ins_pipe( fpu_reg_reg );
4662 %}
4663 
4664 // ====================REDUCTION ARITHMETIC=======================================
4665 
4666 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
4667   predicate(UseSSE > 2 && UseAVX == 0);
4668   match(Set dst (AddReductionVI src1 src2));
4669   effect(TEMP tmp2, TEMP tmp);
4670   format %{ "movdqu  $tmp2,$src2\n\t"
4671             "phaddd  $tmp2,$tmp2\n\t"
4672             "movd    $tmp,$src1\n\t"
4673             "paddd   $tmp,$tmp2\n\t"
4674             "movd    $dst,$tmp\t! add reduction2I" %}
4675   ins_encode %{
4676     __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
4677     __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
4678     __ movdl($tmp$$XMMRegister, $src1$$Register);
4679     __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
4680     __ movdl($dst$$Register, $tmp$$XMMRegister);
4681   %}
4682   ins_pipe( pipe_slow );
4683 %}
4684 
4685 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
4686   predicate(VM_Version::supports_avxonly());
4687   match(Set dst (AddReductionVI src1 src2));
4688   effect(TEMP tmp, TEMP tmp2);
4689   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
4690             "movd     $tmp2,$src1\n\t"
4691             "vpaddd   $tmp2,$tmp2,$tmp\n\t"
4692             "movd     $dst,$tmp2\t! add reduction2I" %}
4693   ins_encode %{
4694     int vector_len = 0;
4695     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4696     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4697     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
4698     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4699   %}
4700   ins_pipe( pipe_slow );
4701 %}
4702 
4703 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
4704   predicate(UseAVX > 2);
4705   match(Set dst (AddReductionVI src1 src2));
4706   effect(TEMP tmp, TEMP tmp2);
4707   format %{ "pshufd  $tmp2,$src2,0x1\n\t"
4708             "vpaddd  $tmp,$src2,$tmp2\n\t"
4709             "movd    $tmp2,$src1\n\t"
4710             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
4711             "movd    $dst,$tmp2\t! add reduction2I" %}
4712   ins_encode %{
4713     int vector_len = 0;
4714     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
4715     __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4716     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4717     __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4718     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4719   %}
4720   ins_pipe( pipe_slow );
4721 %}
4722 
4723 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
4724   predicate(UseSSE > 2 && UseAVX == 0);
4725   match(Set dst (AddReductionVI src1 src2));
4726   effect(TEMP tmp, TEMP tmp2);
4727   format %{ "movdqu  $tmp,$src2\n\t"
4728             "phaddd  $tmp,$tmp\n\t"
4729             "phaddd  $tmp,$tmp\n\t"
4730             "movd    $tmp2,$src1\n\t"
4731             "paddd   $tmp2,$tmp\n\t"
4732             "movd    $dst,$tmp2\t! add reduction4I" %}
4733   ins_encode %{
4734     __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister);
4735     __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister);
4736     __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister);
4737     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4738     __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister);
4739     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4740   %}
4741   ins_pipe( pipe_slow );
4742 %}
4743 
4744 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
4745   predicate(VM_Version::supports_avxonly());
4746   match(Set dst (AddReductionVI src1 src2));
4747   effect(TEMP tmp, TEMP tmp2);
4748   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
4749             "vphaddd  $tmp,$tmp,$tmp\n\t"
4750             "movd     $tmp2,$src1\n\t"
4751             "vpaddd   $tmp2,$tmp2,$tmp\n\t"
4752             "movd     $dst,$tmp2\t! add reduction4I" %}
4753   ins_encode %{
4754     int vector_len = 0;
4755     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4756     __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len);
4757     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4758     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
4759     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4760   %}
4761   ins_pipe( pipe_slow );
4762 %}
4763 
4764 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
4765   predicate(UseAVX > 2);
4766   match(Set dst (AddReductionVI src1 src2));
4767   effect(TEMP tmp, TEMP tmp2);
4768   format %{ "pshufd  $tmp2,$src2,0xE\n\t"
4769             "vpaddd  $tmp,$src2,$tmp2\n\t"
4770             "pshufd  $tmp2,$tmp,0x1\n\t"
4771             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4772             "movd    $tmp2,$src1\n\t"
4773             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
4774             "movd    $dst,$tmp2\t! add reduction4I" %}
4775   ins_encode %{
4776     int vector_len = 0;
4777     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4778     __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4779     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4780     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4781     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4782     __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4783     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4784   %}
4785   ins_pipe( pipe_slow );
4786 %}
4787 
4788 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
4789   predicate(VM_Version::supports_avxonly());
4790   match(Set dst (AddReductionVI src1 src2));
4791   effect(TEMP tmp, TEMP tmp2);
4792   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
4793             "vphaddd  $tmp,$tmp,$tmp2\n\t"
4794             "vextracti128  $tmp2,$tmp\n\t"
4795             "vpaddd   $tmp,$tmp,$tmp2\n\t"
4796             "movd     $tmp2,$src1\n\t"
4797             "vpaddd   $tmp2,$tmp2,$tmp\n\t"
4798             "movd     $dst,$tmp2\t! add reduction8I" %}
4799   ins_encode %{
4800     int vector_len = 1;
4801     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4802     __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4803     __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister);
4804     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4805     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4806     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4807     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4808   %}
4809   ins_pipe( pipe_slow );
4810 %}
4811 
4812 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
4813   predicate(UseAVX > 2);
4814   match(Set dst (AddReductionVI src1 src2));
4815   effect(TEMP tmp, TEMP tmp2);
4816   format %{ "vextracti128  $tmp,$src2\n\t"
4817             "vpaddd  $tmp,$tmp,$src2\n\t"
4818             "pshufd  $tmp2,$tmp,0xE\n\t"
4819             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4820             "pshufd  $tmp2,$tmp,0x1\n\t"
4821             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4822             "movd    $tmp2,$src1\n\t"
4823             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
4824             "movd    $dst,$tmp2\t! add reduction8I" %}
4825   ins_encode %{
4826     int vector_len = 0;
4827     __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
4828     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
4829     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
4830     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4831     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4832     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4833     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4834     __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4835     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4836   %}
4837   ins_pipe( pipe_slow );
4838 %}
4839 
4840 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
4841   predicate(UseAVX > 2);
4842   match(Set dst (AddReductionVI src1 src2));
4843   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4844   format %{ "vextracti64x4  $tmp3,$src2,0x1\n\t"
4845             "vpaddd  $tmp3,$tmp3,$src2\n\t"
4846             "vextracti128   $tmp,$tmp3\n\t"
4847             "vpaddd  $tmp,$tmp,$tmp3\n\t"
4848             "pshufd  $tmp2,$tmp,0xE\n\t"
4849             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4850             "pshufd  $tmp2,$tmp,0x1\n\t"
4851             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4852             "movd    $tmp2,$src1\n\t"
4853             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
4854             "movd    $dst,$tmp2\t! mul reduction16I" %}
4855   ins_encode %{
4856     __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1);
4857     __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
4858     __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
4859     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
4860     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
4861     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4862     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4863     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4864     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4865     __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4866     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4867   %}
4868   ins_pipe( pipe_slow );
4869 %}
4870 
4871 #ifdef _LP64
4872 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{
4873   predicate(UseAVX > 2);
4874   match(Set dst (AddReductionVL src1 src2));
4875   effect(TEMP tmp, TEMP tmp2);
4876   format %{ "pshufd  $tmp2,$src2,0xE\n\t"
4877             "vpaddq  $tmp,$src2,$tmp2\n\t"
4878             "movdq   $tmp2,$src1\n\t"
4879             "vpaddq  $tmp2,$tmp,$tmp2\n\t"
4880             "movdq   $dst,$tmp2\t! add reduction2L" %}
4881   ins_encode %{
4882     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4883     __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0);
4884     __ movdq($tmp2$$XMMRegister, $src1$$Register);
4885     __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4886     __ movdq($dst$$Register, $tmp2$$XMMRegister);
4887   %}
4888   ins_pipe( pipe_slow );
4889 %}
4890 
4891 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
4892   predicate(UseAVX > 2);
4893   match(Set dst (AddReductionVL src1 src2));
4894   effect(TEMP tmp, TEMP tmp2);
4895   format %{ "vextracti128  $tmp,$src2\n\t"
4896             "vpaddq  $tmp2,$tmp,$src2\n\t"
4897             "pshufd  $tmp,$tmp2,0xE\n\t"
4898             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4899             "movdq   $tmp,$src1\n\t"
4900             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4901             "movdq   $dst,$tmp2\t! add reduction4L" %}
4902   ins_encode %{
4903     __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
4904     __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
4905     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
4906     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4907     __ movdq($tmp$$XMMRegister, $src1$$Register);
4908     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4909     __ movdq($dst$$Register, $tmp2$$XMMRegister);
4910   %}
4911   ins_pipe( pipe_slow );
4912 %}
4913 
4914 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
4915   predicate(UseAVX > 2);
4916   match(Set dst (AddReductionVL src1 src2));
4917   effect(TEMP tmp, TEMP tmp2);
4918   format %{ "vextracti64x4  $tmp2,$src2,0x1\n\t"
4919             "vpaddq  $tmp2,$tmp2,$src2\n\t"
4920             "vextracti128   $tmp,$tmp2\n\t"
4921             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4922             "pshufd  $tmp,$tmp2,0xE\n\t"
4923             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4924             "movdq   $tmp,$src1\n\t"
4925             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4926             "movdq   $dst,$tmp2\t! add reduction8L" %}
4927   ins_encode %{
4928     __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1);
4929     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
4930     __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
4931     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4932     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
4933     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4934     __ movdq($tmp$$XMMRegister, $src1$$Register);
4935     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4936     __ movdq($dst$$Register, $tmp2$$XMMRegister);
4937   %}
4938   ins_pipe( pipe_slow );
4939 %}
4940 #endif
4941 
4942 instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
4943   predicate(UseSSE >= 1 && UseAVX == 0);
4944   match(Set dst (AddReductionVF dst src2));
4945   effect(TEMP dst, TEMP tmp);
4946   format %{ "addss   $dst,$src2\n\t"
4947             "pshufd  $tmp,$src2,0x01\n\t"
4948             "addss   $dst,$tmp\t! add reduction2F" %}
4949   ins_encode %{
4950     __ addss($dst$$XMMRegister, $src2$$XMMRegister);
4951     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4952     __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
4953   %}
4954   ins_pipe( pipe_slow );
4955 %}
4956 
4957 instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
4958   predicate(UseAVX > 0);
4959   match(Set dst (AddReductionVF dst src2));
4960   effect(TEMP dst, TEMP tmp);
4961   format %{ "vaddss  $dst,$dst,$src2\n\t"
4962             "pshufd  $tmp,$src2,0x01\n\t"
4963             "vaddss  $dst,$dst,$tmp\t! add reduction2F" %}
4964   ins_encode %{
4965     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
4966     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4967     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
4968   %}
4969   ins_pipe( pipe_slow );
4970 %}
4971 
4972 instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
4973   predicate(UseSSE >= 1 && UseAVX == 0);
4974   match(Set dst (AddReductionVF dst src2));
4975   effect(TEMP dst, TEMP tmp);
4976   format %{ "addss   $dst,$src2\n\t"
4977             "pshufd  $tmp,$src2,0x01\n\t"
4978             "addss   $dst,$tmp\n\t"
4979             "pshufd  $tmp,$src2,0x02\n\t"
4980             "addss   $dst,$tmp\n\t"
4981             "pshufd  $tmp,$src2,0x03\n\t"
4982             "addss   $dst,$tmp\t! add reduction4F" %}
4983   ins_encode %{
4984     __ addss($dst$$XMMRegister, $src2$$XMMRegister);
4985     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4986     __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
4987     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
4988     __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
4989     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
4990     __ addss($dst$$XMMRegister, $tmp$$XMMRegister);
4991   %}
4992   ins_pipe( pipe_slow );
4993 %}
4994 
4995 instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
4996   predicate(UseAVX > 0);
4997   match(Set dst (AddReductionVF dst src2));
4998   effect(TEMP tmp, TEMP dst);
4999   format %{ "vaddss  $dst,dst,$src2\n\t"
5000             "pshufd  $tmp,$src2,0x01\n\t"
5001             "vaddss  $dst,$dst,$tmp\n\t"
5002             "pshufd  $tmp,$src2,0x02\n\t"
5003             "vaddss  $dst,$dst,$tmp\n\t"
5004             "pshufd  $tmp,$src2,0x03\n\t"
5005             "vaddss  $dst,$dst,$tmp\t! add reduction4F" %}
5006   ins_encode %{
5007     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5008     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5009     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5010     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5011     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5012     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5013     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5014   %}
5015   ins_pipe( pipe_slow );
5016 %}
5017 
5018 instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
5019   predicate(UseAVX > 0);
5020   match(Set dst (AddReductionVF dst src2));
5021   effect(TEMP tmp, TEMP dst, TEMP tmp2);
5022   format %{ "vaddss  $dst,$dst,$src2\n\t"
5023             "pshufd  $tmp,$src2,0x01\n\t"
5024             "vaddss  $dst,$dst,$tmp\n\t"
5025             "pshufd  $tmp,$src2,0x02\n\t"
5026             "vaddss  $dst,$dst,$tmp\n\t"
5027             "pshufd  $tmp,$src2,0x03\n\t"
5028             "vaddss  $dst,$dst,$tmp\n\t"
5029             "vextractf128  $tmp2,$src2\n\t"
5030             "vaddss  $dst,$dst,$tmp2\n\t"
5031             "pshufd  $tmp,$tmp2,0x01\n\t"
5032             "vaddss  $dst,$dst,$tmp\n\t"
5033             "pshufd  $tmp,$tmp2,0x02\n\t"
5034             "vaddss  $dst,$dst,$tmp\n\t"
5035             "pshufd  $tmp,$tmp2,0x03\n\t"
5036             "vaddss  $dst,$dst,$tmp\t! add reduction8F" %}
5037   ins_encode %{
5038     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5039     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5040     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5041     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5042     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5043     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5044     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5045     __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
5046     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
5047     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
5048     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5049     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
5050     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5051     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
5052     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5053   %}
5054   ins_pipe( pipe_slow );
5055 %}
5056 
5057 instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
5058   predicate(UseAVX > 2);
5059   match(Set dst (AddReductionVF dst src2));
5060   effect(TEMP tmp, TEMP dst, TEMP tmp2);
5061   format %{ "vaddss  $dst,$dst,$src2\n\t"
5062             "pshufd  $tmp,$src2,0x01\n\t"
5063             "vaddss  $dst,$dst,$tmp\n\t"
5064             "pshufd  $tmp,$src2,0x02\n\t"
5065             "vaddss  $dst,$dst,$tmp\n\t"
5066             "pshufd  $tmp,$src2,0x03\n\t"
5067             "vaddss  $dst,$dst,$tmp\n\t"
5068             "vextractf32x4  $tmp2,$src2, 0x1\n\t"
5069             "vaddss  $dst,$dst,$tmp2\n\t"
5070             "pshufd  $tmp,$tmp2,0x01\n\t"
5071             "vaddss  $dst,$dst,$tmp\n\t"
5072             "pshufd  $tmp,$tmp2,0x02\n\t"
5073             "vaddss  $dst,$dst,$tmp\n\t"
5074             "pshufd  $tmp,$tmp2,0x03\n\t"
5075             "vaddss  $dst,$dst,$tmp\n\t"
5076             "vextractf32x4  $tmp2,$src2, 0x2\n\t"
5077             "vaddss  $dst,$dst,$tmp2\n\t"
5078             "pshufd  $tmp,$tmp2,0x01\n\t"
5079             "vaddss  $dst,$dst,$tmp\n\t"
5080             "pshufd  $tmp,$tmp2,0x02\n\t"
5081             "vaddss  $dst,$dst,$tmp\n\t"
5082             "pshufd  $tmp,$tmp2,0x03\n\t"
5083             "vaddss  $dst,$dst,$tmp\n\t"
5084             "vextractf32x4  $tmp2,$src2, 0x3\n\t"
5085             "vaddss  $dst,$dst,$tmp2\n\t"
5086             "pshufd  $tmp,$tmp2,0x01\n\t"
5087             "vaddss  $dst,$dst,$tmp\n\t"
5088             "pshufd  $tmp,$tmp2,0x02\n\t"
5089             "vaddss  $dst,$dst,$tmp\n\t"
5090             "pshufd  $tmp,$tmp2,0x03\n\t"
5091             "vaddss  $dst,$dst,$tmp\t! add reduction16F" %}
5092   ins_encode %{
5093     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5094     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5095     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5096     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5097     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5098     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5099     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5100     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
5101     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
5102     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
5103     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5104     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
5105     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5106     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
5107     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5108     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
5109     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
5110     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
5111     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5112     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
5113     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5114     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
5115     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5116     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
5117     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
5118     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
5119     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5120     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
5121     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5122     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
5123     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5124   %}
5125   ins_pipe( pipe_slow );
5126 %}
5127 
5128 instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
5129   predicate(UseSSE >= 1 && UseAVX == 0);
5130   match(Set dst (AddReductionVD dst src2));
5131   effect(TEMP tmp, TEMP dst);
5132   format %{ "addsd   $dst,$src2\n\t"
5133             "pshufd  $tmp,$src2,0xE\n\t"
5134             "addsd   $dst,$tmp\t! add reduction2D" %}
5135   ins_encode %{
5136     __ addsd($dst$$XMMRegister, $src2$$XMMRegister);
5137     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5138     __ addsd($dst$$XMMRegister, $tmp$$XMMRegister);
5139   %}
5140   ins_pipe( pipe_slow );
5141 %}
5142 
5143 instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
5144   predicate(UseAVX > 0);
5145   match(Set dst (AddReductionVD dst src2));
5146   effect(TEMP tmp, TEMP dst);
5147   format %{ "vaddsd  $dst,$dst,$src2\n\t"
5148             "pshufd  $tmp,$src2,0xE\n\t"
5149             "vaddsd  $dst,$dst,$tmp\t! add reduction2D" %}
5150   ins_encode %{
5151     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5152     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5153     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5154   %}
5155   ins_pipe( pipe_slow );
5156 %}
5157 
5158 instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
5159   predicate(UseAVX > 0);
5160   match(Set dst (AddReductionVD dst src2));
5161   effect(TEMP tmp, TEMP dst, TEMP tmp2);
5162   format %{ "vaddsd  $dst,$dst,$src2\n\t"
5163             "pshufd  $tmp,$src2,0xE\n\t"
5164             "vaddsd  $dst,$dst,$tmp\n\t"
5165             "vextractf32x4h  $tmp2,$src2, 0x1\n\t"
5166             "vaddsd  $dst,$dst,$tmp2\n\t"
5167             "pshufd  $tmp,$tmp2,0xE\n\t"
5168             "vaddsd  $dst,$dst,$tmp\t! add reduction4D" %}
5169   ins_encode %{
5170     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5171     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5172     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5173     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
5174     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
5175     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
5176     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5177   %}
5178   ins_pipe( pipe_slow );
5179 %}
5180 
5181 instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
5182   predicate(UseAVX > 2);
5183   match(Set dst (AddReductionVD dst src2));
5184   effect(TEMP tmp, TEMP dst, TEMP tmp2);
5185   format %{ "vaddsd  $dst,$dst,$src2\n\t"
5186             "pshufd  $tmp,$src2,0xE\n\t"
5187             "vaddsd  $dst,$dst,$tmp\n\t"
5188             "vextractf32x4  $tmp2,$src2, 0x1\n\t"
5189             "vaddsd  $dst,$dst,$tmp2\n\t"
5190             "pshufd  $tmp,$tmp2,0xE\n\t"
5191             "vaddsd  $dst,$dst,$tmp\n\t"
5192             "vextractf32x4  $tmp2,$src2, 0x2\n\t"
5193             "vaddsd  $dst,$dst,$tmp2\n\t"
5194             "pshufd  $tmp,$tmp2,0xE\n\t"
5195             "vaddsd  $dst,$dst,$tmp\n\t"
5196             "vextractf32x4  $tmp2,$src2, 0x3\n\t"
5197             "vaddsd  $dst,$dst,$tmp2\n\t"
5198             "pshufd  $tmp,$tmp2,0xE\n\t"
5199             "vaddsd  $dst,$dst,$tmp\t! add reduction8D" %}
5200   ins_encode %{
5201     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5202     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5203     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5204     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
5205     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
5206     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
5207     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5208     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
5209     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
5210     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
5211     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5212     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
5213     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
5214     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
5215     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5216   %}
5217   ins_pipe( pipe_slow );
5218 %}
5219 
5220 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
5221   predicate(UseSSE > 3 && UseAVX == 0);
5222   match(Set dst (MulReductionVI src1 src2));
5223   effect(TEMP tmp, TEMP tmp2);
5224   format %{ "pshufd  $tmp2,$src2,0x1\n\t"
5225             "pmulld  $tmp2,$src2\n\t"
5226             "movd    $tmp,$src1\n\t"
5227             "pmulld  $tmp2,$tmp\n\t"
5228             "movd    $dst,$tmp2\t! mul reduction2I" %}
5229   ins_encode %{
5230     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
5231     __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
5232     __ movdl($tmp$$XMMRegister, $src1$$Register);
5233     __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
5234     __ movdl($dst$$Register, $tmp2$$XMMRegister);
5235   %}
5236   ins_pipe( pipe_slow );
5237 %}
5238 
5239 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
5240   predicate(UseAVX > 0);
5241   match(Set dst (MulReductionVI src1 src2));
5242   effect(TEMP tmp, TEMP tmp2);
5243   format %{ "pshufd   $tmp2,$src2,0x1\n\t"
5244             "vpmulld  $tmp,$src2,$tmp2\n\t"
5245             "movd     $tmp2,$src1\n\t"
5246             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
5247             "movd     $dst,$tmp2\t! mul reduction2I" %}
5248   ins_encode %{
5249     int vector_len = 0;
5250     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
5251     __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5252     __ movdl($tmp2$$XMMRegister, $src1$$Register);
5253     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5254     __ movdl($dst$$Register, $tmp2$$XMMRegister);
5255   %}
5256   ins_pipe( pipe_slow );
5257 %}
5258 
5259 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
5260   predicate(UseSSE > 3 && UseAVX == 0);
5261   match(Set dst (MulReductionVI src1 src2));
5262   effect(TEMP tmp, TEMP tmp2);
5263   format %{ "pshufd  $tmp2,$src2,0xE\n\t"
5264             "pmulld  $tmp2,$src2\n\t"
5265             "pshufd  $tmp,$tmp2,0x1\n\t"
5266             "pmulld  $tmp2,$tmp\n\t"
5267             "movd    $tmp,$src1\n\t"
5268             "pmulld  $tmp2,$tmp\n\t"
5269             "movd    $dst,$tmp2\t! mul reduction4I" %}
5270   ins_encode %{
5271     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
5272     __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
5273     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1);
5274     __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
5275     __ movdl($tmp$$XMMRegister, $src1$$Register);
5276     __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
5277     __ movdl($dst$$Register, $tmp2$$XMMRegister);
5278   %}
5279   ins_pipe( pipe_slow );
5280 %}
5281 
5282 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
5283   predicate(UseAVX > 0);
5284   match(Set dst (MulReductionVI src1 src2));
5285   effect(TEMP tmp, TEMP tmp2);
5286   format %{ "pshufd   $tmp2,$src2,0xE\n\t"
5287             "vpmulld  $tmp,$src2,$tmp2\n\t"
5288             "pshufd   $tmp2,$tmp,0x1\n\t"
5289             "vpmulld  $tmp,$tmp,$tmp2\n\t"
5290             "movd     $tmp2,$src1\n\t"
5291             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
5292             "movd     $dst,$tmp2\t! mul reduction4I" %}
5293   ins_encode %{
5294     int vector_len = 0;
5295     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
5296     __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5297     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
5298     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5299     __ movdl($tmp2$$XMMRegister, $src1$$Register);
5300     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5301     __ movdl($dst$$Register, $tmp2$$XMMRegister);
5302   %}
5303   ins_pipe( pipe_slow );
5304 %}
5305 
5306 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
5307   predicate(UseAVX > 0);
5308   match(Set dst (MulReductionVI src1 src2));
5309   effect(TEMP tmp, TEMP tmp2);
5310   format %{ "vextracti128  $tmp,$src2\n\t"
5311             "vpmulld  $tmp,$tmp,$src2\n\t"
5312             "pshufd   $tmp2,$tmp,0xE\n\t"
5313             "vpmulld  $tmp,$tmp,$tmp2\n\t"
5314             "pshufd   $tmp2,$tmp,0x1\n\t"
5315             "vpmulld  $tmp,$tmp,$tmp2\n\t"
5316             "movd     $tmp2,$src1\n\t"
5317             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
5318             "movd     $dst,$tmp2\t! mul reduction8I" %}
5319   ins_encode %{
5320     int vector_len = 0;
5321     __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
5322     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
5323     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
5324     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5325     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
5326     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5327     __ movdl($tmp2$$XMMRegister, $src1$$Register);
5328     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
5329     __ movdl($dst$$Register, $tmp2$$XMMRegister);
5330   %}
5331   ins_pipe( pipe_slow );
5332 %}
5333 
5334 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
5335   predicate(UseAVX > 2);
5336   match(Set dst (MulReductionVI src1 src2));
5337   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
5338   format %{ "vextracti64x4  $tmp3,$src2,0x1\n\t"
5339             "vpmulld  $tmp3,$tmp3,$src2\n\t"
5340             "vextracti128   $tmp,$tmp3\n\t"
5341             "vpmulld  $tmp,$tmp,$src2\n\t"
5342             "pshufd   $tmp2,$tmp,0xE\n\t"
5343             "vpmulld  $tmp,$tmp,$tmp2\n\t"
5344             "pshufd   $tmp2,$tmp,0x1\n\t"
5345             "vpmulld  $tmp,$tmp,$tmp2\n\t"
5346             "movd     $tmp2,$src1\n\t"
5347             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
5348             "movd     $dst,$tmp2\t! mul reduction16I" %}
5349   ins_encode %{
5350     __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1);
5351     __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
5352     __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
5353     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
5354     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
5355     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
5356     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
5357     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
5358     __ movdl($tmp2$$XMMRegister, $src1$$Register);
5359     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
5360     __ movdl($dst$$Register, $tmp2$$XMMRegister);
5361   %}
5362   ins_pipe( pipe_slow );
5363 %}
5364 
5365 #ifdef _LP64
5366 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{
5367   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
5368   match(Set dst (MulReductionVL src1 src2));
5369   effect(TEMP tmp, TEMP tmp2);
5370   format %{ "pshufd   $tmp2,$src2,0xE\n\t"
5371             "vpmullq  $tmp,$src2,$tmp2\n\t"
5372             "movdq    $tmp2,$src1\n\t"
5373             "vpmullq  $tmp2,$tmp,$tmp2\n\t"
5374             "movdq    $dst,$tmp2\t! mul reduction2L" %}
5375   ins_encode %{
5376     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
5377     __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0);
5378     __ movdq($tmp2$$XMMRegister, $src1$$Register);
5379     __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
5380     __ movdq($dst$$Register, $tmp2$$XMMRegister);
5381   %}
5382   ins_pipe( pipe_slow );
5383 %}
5384 
5385 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
5386   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
5387   match(Set dst (MulReductionVL src1 src2));
5388   effect(TEMP tmp, TEMP tmp2);
5389   format %{ "vextracti128  $tmp,$src2\n\t"
5390             "vpmullq  $tmp2,$tmp,$src2\n\t"
5391             "pshufd   $tmp,$tmp2,0xE\n\t"
5392             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
5393             "movdq    $tmp,$src1\n\t"
5394             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
5395             "movdq    $dst,$tmp2\t! mul reduction4L" %}
5396   ins_encode %{
5397     __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
5398     __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
5399     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
5400     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
5401     __ movdq($tmp$$XMMRegister, $src1$$Register);
5402     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
5403     __ movdq($dst$$Register, $tmp2$$XMMRegister);
5404   %}
5405   ins_pipe( pipe_slow );
5406 %}
5407 
5408 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
5409   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
5410   match(Set dst (MulReductionVL src1 src2));
5411   effect(TEMP tmp, TEMP tmp2);
5412   format %{ "vextracti64x4  $tmp2,$src2,0x1\n\t"
5413             "vpmullq  $tmp2,$tmp2,$src2\n\t"
5414             "vextracti128   $tmp,$tmp2\n\t"
5415             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
5416             "pshufd   $tmp,$tmp2,0xE\n\t"
5417             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
5418             "movdq    $tmp,$src1\n\t"
5419             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
5420             "movdq    $dst,$tmp2\t! mul reduction8L" %}
5421   ins_encode %{
5422     __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1);
5423     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
5424     __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
5425     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
5426     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
5427     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
5428     __ movdq($tmp$$XMMRegister, $src1$$Register);
5429     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
5430     __ movdq($dst$$Register, $tmp2$$XMMRegister);
5431   %}
5432   ins_pipe( pipe_slow );
5433 %}
5434 #endif
5435 
5436 instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{
5437   predicate(UseSSE >= 1 && UseAVX == 0);
5438   match(Set dst (MulReductionVF dst src2));
5439   effect(TEMP dst, TEMP tmp);
5440   format %{ "mulss   $dst,$src2\n\t"
5441             "pshufd  $tmp,$src2,0x01\n\t"
5442             "mulss   $dst,$tmp\t! mul reduction2F" %}
5443   ins_encode %{
5444     __ mulss($dst$$XMMRegister, $src2$$XMMRegister);
5445     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5446     __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
5447   %}
5448   ins_pipe( pipe_slow );
5449 %}
5450 
5451 instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
5452   predicate(UseAVX > 0);
5453   match(Set dst (MulReductionVF dst src2));
5454   effect(TEMP tmp, TEMP dst);
5455   format %{ "vmulss  $dst,$dst,$src2\n\t"
5456             "pshufd  $tmp,$src2,0x01\n\t"
5457             "vmulss  $dst,$dst,$tmp\t! mul reduction2F" %}
5458   ins_encode %{
5459     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5460     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5461     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5462   %}
5463   ins_pipe( pipe_slow );
5464 %}
5465 
5466 instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
5467   predicate(UseSSE >= 1 && UseAVX == 0);
5468   match(Set dst (MulReductionVF dst src2));
5469   effect(TEMP dst, TEMP tmp);
5470   format %{ "mulss   $dst,$src2\n\t"
5471             "pshufd  $tmp,$src2,0x01\n\t"
5472             "mulss   $dst,$tmp\n\t"
5473             "pshufd  $tmp,$src2,0x02\n\t"
5474             "mulss   $dst,$tmp\n\t"
5475             "pshufd  $tmp,$src2,0x03\n\t"
5476             "mulss   $dst,$tmp\t! mul reduction4F" %}
5477   ins_encode %{
5478     __ mulss($dst$$XMMRegister, $src2$$XMMRegister);
5479     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5480     __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
5481     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5482     __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
5483     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5484     __ mulss($dst$$XMMRegister, $tmp$$XMMRegister);
5485   %}
5486   ins_pipe( pipe_slow );
5487 %}
5488 
5489 instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
5490   predicate(UseAVX > 0);
5491   match(Set dst (MulReductionVF dst src2));
5492   effect(TEMP tmp, TEMP dst);
5493   format %{ "vmulss  $dst,$dst,$src2\n\t"
5494             "pshufd  $tmp,$src2,0x01\n\t"
5495             "vmulss  $dst,$dst,$tmp\n\t"
5496             "pshufd  $tmp,$src2,0x02\n\t"
5497             "vmulss  $dst,$dst,$tmp\n\t"
5498             "pshufd  $tmp,$src2,0x03\n\t"
5499             "vmulss  $dst,$dst,$tmp\t! mul reduction4F" %}
5500   ins_encode %{
5501     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5502     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5503     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5504     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5505     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5506     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5507     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5508   %}
5509   ins_pipe( pipe_slow );
5510 %}
5511 
5512 instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
5513   predicate(UseAVX > 0);
5514   match(Set dst (MulReductionVF dst src2));
5515   effect(TEMP tmp, TEMP dst, TEMP tmp2);
5516   format %{ "vmulss  $dst,$dst,$src2\n\t"
5517             "pshufd  $tmp,$src2,0x01\n\t"
5518             "vmulss  $dst,$dst,$tmp\n\t"
5519             "pshufd  $tmp,$src2,0x02\n\t"
5520             "vmulss  $dst,$dst,$tmp\n\t"
5521             "pshufd  $tmp,$src2,0x03\n\t"
5522             "vmulss  $dst,$dst,$tmp\n\t"
5523             "vextractf128  $tmp2,$src2\n\t"
5524             "vmulss  $dst,$dst,$tmp2\n\t"
5525             "pshufd  $tmp,$tmp2,0x01\n\t"
5526             "vmulss  $dst,$dst,$tmp\n\t"
5527             "pshufd  $tmp,$tmp2,0x02\n\t"
5528             "vmulss  $dst,$dst,$tmp\n\t"
5529             "pshufd  $tmp,$tmp2,0x03\n\t"
5530             "vmulss  $dst,$dst,$tmp\t! mul reduction8F" %}
5531   ins_encode %{
5532     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5533     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5534     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5535     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5536     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5537     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5538     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5539     __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
5540     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
5541     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
5542     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5543     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
5544     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5545     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
5546     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5547   %}
5548   ins_pipe( pipe_slow );
5549 %}
5550 
5551 instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
5552   predicate(UseAVX > 2);
5553   match(Set dst (MulReductionVF dst src2));
5554   effect(TEMP tmp, TEMP dst, TEMP tmp2);
5555   format %{ "vmulss  $dst,$dst,$src2\n\t"
5556             "pshufd  $tmp,$src2,0x01\n\t"
5557             "vmulss  $dst,$dst,$tmp\n\t"
5558             "pshufd  $tmp,$src2,0x02\n\t"
5559             "vmulss  $dst,$dst,$tmp\n\t"
5560             "pshufd  $tmp,$src2,0x03\n\t"
5561             "vmulss  $dst,$dst,$tmp\n\t"
5562             "vextractf32x4  $tmp2,$src2, 0x1\n\t"
5563             "vmulss  $dst,$dst,$tmp2\n\t"
5564             "pshufd  $tmp,$tmp2,0x01\n\t"
5565             "vmulss  $dst,$dst,$tmp\n\t"
5566             "pshufd  $tmp,$tmp2,0x02\n\t"
5567             "vmulss  $dst,$dst,$tmp\n\t"
5568             "pshufd  $tmp,$tmp2,0x03\n\t"
5569             "vmulss  $dst,$dst,$tmp\n\t"
5570             "vextractf32x4  $tmp2,$src2, 0x2\n\t"
5571             "vmulss  $dst,$dst,$tmp2\n\t"
5572             "pshufd  $tmp,$tmp2,0x01\n\t"
5573             "vmulss  $dst,$dst,$tmp\n\t"
5574             "pshufd  $tmp,$tmp2,0x02\n\t"
5575             "vmulss  $dst,$dst,$tmp\n\t"
5576             "pshufd  $tmp,$tmp2,0x03\n\t"
5577             "vmulss  $dst,$dst,$tmp\n\t"
5578             "vextractf32x4  $tmp2,$src2, 0x3\n\t"
5579             "vmulss  $dst,$dst,$tmp2\n\t"
5580             "pshufd  $tmp,$tmp2,0x01\n\t"
5581             "vmulss  $dst,$dst,$tmp\n\t"
5582             "pshufd  $tmp,$tmp2,0x02\n\t"
5583             "vmulss  $dst,$dst,$tmp\n\t"
5584             "pshufd  $tmp,$tmp2,0x03\n\t"
5585             "vmulss  $dst,$dst,$tmp\t! mul reduction16F" %}
5586   ins_encode %{
5587     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5588     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5589     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5590     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5591     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5592     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5593     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5594     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
5595     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
5596     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
5597     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5598     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
5599     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5600     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
5601     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5602     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
5603     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
5604     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
5605     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5606     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
5607     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5608     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
5609     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5610     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
5611     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
5612     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
5613     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5614     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02);
5615     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5616     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
5617     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5618   %}
5619   ins_pipe( pipe_slow );
5620 %}
5621 
5622 instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
5623   predicate(UseSSE >= 1 && UseAVX == 0);
5624   match(Set dst (MulReductionVD dst src2));
5625   effect(TEMP dst, TEMP tmp);
5626   format %{ "mulsd   $dst,$src2\n\t"
5627             "pshufd  $tmp,$src2,0xE\n\t"
5628             "mulsd   $dst,$tmp\t! mul reduction2D" %}
5629   ins_encode %{
5630     __ mulsd($dst$$XMMRegister, $src2$$XMMRegister);
5631     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5632     __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister);
5633   %}
5634   ins_pipe( pipe_slow );
5635 %}
5636 
5637 instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
5638   predicate(UseAVX > 0);
5639   match(Set dst (MulReductionVD dst src2));
5640   effect(TEMP tmp, TEMP dst);
5641   format %{ "vmulsd  $dst,$dst,$src2\n\t"
5642             "pshufd  $tmp,$src2,0xE\n\t"
5643             "vmulsd  $dst,$dst,$tmp\t! mul reduction2D" %}
5644   ins_encode %{
5645     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5646     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5647     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5648   %}
5649   ins_pipe( pipe_slow );
5650 %}
5651 
5652 instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
5653   predicate(UseAVX > 0);
5654   match(Set dst (MulReductionVD dst src2));
5655   effect(TEMP tmp, TEMP dst, TEMP tmp2);
5656   format %{ "vmulsd  $dst,$dst,$src2\n\t"
5657             "pshufd  $tmp,$src2,0xE\n\t"
5658             "vmulsd  $dst,$dst,$tmp\n\t"
5659             "vextractf128  $tmp2,$src2\n\t"
5660             "vmulsd  $dst,$dst,$tmp2\n\t"
5661             "pshufd  $tmp,$tmp2,0xE\n\t"
5662             "vmulsd  $dst,$dst,$tmp\t! mul reduction4D" %}
5663   ins_encode %{
5664     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5665     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5666     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5667     __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
5668     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
5669     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
5670     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5671   %}
5672   ins_pipe( pipe_slow );
5673 %}
5674 
5675 instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
5676   predicate(UseAVX > 2);
5677   match(Set dst (MulReductionVD dst src2));
5678   effect(TEMP tmp, TEMP dst, TEMP tmp2);
5679   format %{ "vmulsd  $dst,$dst,$src2\n\t"
5680             "pshufd  $tmp,$src2,0xE\n\t"
5681             "vmulsd  $dst,$dst,$tmp\n\t"
5682             "vextractf32x4  $tmp2,$src2, 0x1\n\t"
5683             "vmulsd  $dst,$dst,$tmp2\n\t"
5684             "pshufd  $tmp,$src2,0xE\n\t"
5685             "vmulsd  $dst,$dst,$tmp\n\t"
5686             "vextractf32x4  $tmp2,$src2, 0x2\n\t"
5687             "vmulsd  $dst,$dst,$tmp2\n\t"
5688             "pshufd  $tmp,$tmp2,0xE\n\t"
5689             "vmulsd  $dst,$dst,$tmp\n\t"
5690             "vextractf32x4  $tmp2,$src2, 0x3\n\t"
5691             "vmulsd  $dst,$dst,$tmp2\n\t"
5692             "pshufd  $tmp,$tmp2,0xE\n\t"
5693             "vmulsd  $dst,$dst,$tmp\t! mul reduction8D" %}
5694   ins_encode %{
5695     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
5696     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5697     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5698     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
5699     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
5700     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
5701     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5702     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
5703     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
5704     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
5705     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5706     __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
5707     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
5708     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
5709     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
5710   %}
5711   ins_pipe( pipe_slow );
5712 %}
5713 
5714 // ====================VECTOR ARITHMETIC=======================================
5715 
5716 // --------------------------------- ADD --------------------------------------
5717 
5718 // Bytes vector add
5719 instruct vadd4B(vecS dst, vecS src) %{
5720   predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
5721   match(Set dst (AddVB dst src));
5722   format %{ "paddb   $dst,$src\t! add packed4B" %}
5723   ins_encode %{
5724     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
5725   %}
5726   ins_pipe( pipe_slow );
5727 %}
5728 
5729 instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{
5730   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
5731   match(Set dst (AddVB src1 src2));
5732   format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
5733   ins_encode %{
5734     int vector_len = 0;
5735     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5736   %}
5737   ins_pipe( pipe_slow );
5738 %}
5739 
5740 instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{
5741   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
5742   match(Set dst (AddVB src1 src2));
5743   format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
5744   ins_encode %{
5745     int vector_len = 0;
5746     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5747   %}
5748   ins_pipe( pipe_slow );
5749 %}
5750 
5751 instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{
5752   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
5753   match(Set dst (AddVB dst src2));
5754   effect(TEMP src1);
5755   format %{ "vpaddb  $dst,$dst,$src2\t! add packed4B" %}
5756   ins_encode %{
5757     int vector_len = 0;
5758     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5759   %}
5760   ins_pipe( pipe_slow );
5761 %}
5762 
5763 instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{
5764   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
5765   match(Set dst (AddVB src (LoadVector mem)));
5766   format %{ "vpaddb  $dst,$src,$mem\t! add packed4B" %}
5767   ins_encode %{
5768     int vector_len = 0;
5769     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5770   %}
5771   ins_pipe( pipe_slow );
5772 %}
5773 
5774 instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{
5775   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
5776   match(Set dst (AddVB src (LoadVector mem)));
5777   format %{ "vpaddb  $dst,$src,$mem\t! add packed4B" %}
5778   ins_encode %{
5779     int vector_len = 0;
5780     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5781   %}
5782   ins_pipe( pipe_slow );
5783 %}
5784 
5785 instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{
5786   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
5787   match(Set dst (AddVB dst (LoadVector mem)));
5788   effect(TEMP src);
5789   format %{ "vpaddb  $dst,$src,$mem\t! add packed4B" %}
5790   ins_encode %{
5791     int vector_len = 0;
5792     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5793   %}
5794   ins_pipe( pipe_slow );
5795 %}
5796 
5797 instruct vadd8B(vecD dst, vecD src) %{
5798   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
5799   match(Set dst (AddVB dst src));
5800   format %{ "paddb   $dst,$src\t! add packed8B" %}
5801   ins_encode %{
5802     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
5803   %}
5804   ins_pipe( pipe_slow );
5805 %}
5806 
5807 instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{
5808   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
5809   match(Set dst (AddVB src1 src2));
5810   format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
5811   ins_encode %{
5812     int vector_len = 0;
5813     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5814   %}
5815   ins_pipe( pipe_slow );
5816 %}
5817 
5818 instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{
5819   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
5820   match(Set dst (AddVB src1 src2));
5821   format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
5822   ins_encode %{
5823     int vector_len = 0;
5824     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5825   %}
5826   ins_pipe( pipe_slow );
5827 %}
5828 
5829 instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
5830   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
5831   match(Set dst (AddVB dst src2));
5832   effect(TEMP src1);
5833   format %{ "vpaddb  $dst,$dst,$src2\t! add packed8B" %}
5834   ins_encode %{
5835     int vector_len = 0;
5836     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5837   %}
5838   ins_pipe( pipe_slow );
5839 %}
5840 
5841 instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{
5842   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
5843   match(Set dst (AddVB src (LoadVector mem)));
5844   format %{ "vpaddb  $dst,$src,$mem\t! add packed8B" %}
5845   ins_encode %{
5846     int vector_len = 0;
5847     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5848   %}
5849   ins_pipe( pipe_slow );
5850 %}
5851 
5852 instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{
5853   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
5854   match(Set dst (AddVB src (LoadVector mem)));
5855   format %{ "vpaddb  $dst,$src,$mem\t! add packed8B" %}
5856   ins_encode %{
5857     int vector_len = 0;
5858     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5859   %}
5860   ins_pipe( pipe_slow );
5861 %}
5862 
5863 instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{
5864   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
5865   match(Set dst (AddVB dst (LoadVector mem)));
5866   effect(TEMP src);
5867   format %{ "vpaddb  $dst,$src,$mem\t! add packed8B" %}
5868   ins_encode %{
5869     int vector_len = 0;
5870     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5871   %}
5872   ins_pipe( pipe_slow );
5873 %}
5874 
5875 instruct vadd16B(vecX dst, vecX src) %{
5876   predicate(UseAVX == 0 && n->as_Vector()->length() == 16);
5877   match(Set dst (AddVB dst src));
5878   format %{ "paddb   $dst,$src\t! add packed16B" %}
5879   ins_encode %{
5880     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
5881   %}
5882   ins_pipe( pipe_slow );
5883 %}
5884 
5885 instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{
5886   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
5887   match(Set dst (AddVB src1 src2));
5888   format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
5889   ins_encode %{
5890     int vector_len = 0;
5891     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5892   %}
5893   ins_pipe( pipe_slow );
5894 %}
5895 
5896 instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{
5897   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
5898   match(Set dst (AddVB src1 src2));
5899   format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
5900   ins_encode %{
5901     int vector_len = 0;
5902     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5903   %}
5904   ins_pipe( pipe_slow );
5905 %}
5906 
5907 instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
5908   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
5909   match(Set dst (AddVB dst src2));
5910   effect(TEMP src1);
5911   format %{ "vpaddb  $dst,$dst,$src2\t! add packed16B" %}
5912   ins_encode %{
5913     int vector_len = 0;
5914     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5915   %}
5916   ins_pipe( pipe_slow );
5917 %}
5918 
5919 instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{
5920   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
5921   match(Set dst (AddVB src (LoadVector mem)));
5922   format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
5923   ins_encode %{
5924     int vector_len = 0;
5925     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5926   %}
5927   ins_pipe( pipe_slow );
5928 %}
5929 
5930 instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{
5931   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
5932   match(Set dst (AddVB src (LoadVector mem)));
5933   format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
5934   ins_encode %{
5935     int vector_len = 0;
5936     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5937   %}
5938   ins_pipe( pipe_slow );
5939 %}
5940 
5941 instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{
5942   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
5943   match(Set dst (AddVB dst (LoadVector mem)));
5944   effect(TEMP src);
5945   format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
5946   ins_encode %{
5947     int vector_len = 0;
5948     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5949   %}
5950   ins_pipe( pipe_slow );
5951 %}
5952 
5953 instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{
5954   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
5955   match(Set dst (AddVB src1 src2));
5956   format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
5957   ins_encode %{
5958     int vector_len = 1;
5959     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5960   %}
5961   ins_pipe( pipe_slow );
5962 %}
5963 
5964 instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{
5965   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
5966   match(Set dst (AddVB src1 src2));
5967   format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
5968   ins_encode %{
5969     int vector_len = 1;
5970     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5971   %}
5972   ins_pipe( pipe_slow );
5973 %}
5974 
5975 instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
5976   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
5977   match(Set dst (AddVB dst src2));
5978   effect(TEMP src1);
5979   format %{ "vpaddb  $dst,$dst,$src2\t! add packed32B" %}
5980   ins_encode %{
5981     int vector_len = 1;
5982     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5983   %}
5984   ins_pipe( pipe_slow );
5985 %}
5986 
5987 instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{
5988   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
5989   match(Set dst (AddVB src (LoadVector mem)));
5990   format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
5991   ins_encode %{
5992     int vector_len = 1;
5993     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5994   %}
5995   ins_pipe( pipe_slow );
5996 %}
5997 
5998 instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{
5999   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
6000   match(Set dst (AddVB src (LoadVector mem)));
6001   format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
6002   ins_encode %{
6003     int vector_len = 1;
6004     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6005   %}
6006   ins_pipe( pipe_slow );
6007 %}
6008 
6009 instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{
6010   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
6011   match(Set dst (AddVB dst (LoadVector mem)));
6012   effect(TEMP src);
6013   format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
6014   ins_encode %{
6015     int vector_len = 1;
6016     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6017   %}
6018   ins_pipe( pipe_slow );
6019 %}
6020 
6021 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
6022   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
6023   match(Set dst (AddVB src1 src2));
6024   format %{ "vpaddb  $dst,$src1,$src2\t! add packed64B" %}
6025   ins_encode %{
6026     int vector_len = 2;
6027     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6028   %}
6029   ins_pipe( pipe_slow );
6030 %}
6031 
6032 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{
6033   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
6034   match(Set dst (AddVB src (LoadVector mem)));
6035   format %{ "vpaddb  $dst,$src,$mem\t! add packed64B" %}
6036   ins_encode %{
6037     int vector_len = 2;
6038     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6039   %}
6040   ins_pipe( pipe_slow );
6041 %}
6042 
6043 // Shorts/Chars vector add
6044 instruct vadd2S(vecS dst, vecS src) %{
6045   predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
6046   match(Set dst (AddVS dst src));
6047   format %{ "paddw   $dst,$src\t! add packed2S" %}
6048   ins_encode %{
6049     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
6050   %}
6051   ins_pipe( pipe_slow );
6052 %}
6053 
6054 instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{
6055   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
6056   match(Set dst (AddVS src1 src2));
6057   format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
6058   ins_encode %{
6059     int vector_len = 0;
6060     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6061   %}
6062   ins_pipe( pipe_slow );
6063 %}
6064 
6065 instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{
6066   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
6067   match(Set dst (AddVS src1 src2));
6068   format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
6069   ins_encode %{
6070     int vector_len = 0;
6071     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6072   %}
6073   ins_pipe( pipe_slow );
6074 %}
6075 
6076 instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{
6077   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
6078   match(Set dst (AddVS dst src2));
6079   effect(TEMP src1);
6080   format %{ "vpaddw  $dst,$dst,$src2\t! add packed2S" %}
6081   ins_encode %{
6082     int vector_len = 0;
6083     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6084   %}
6085   ins_pipe( pipe_slow );
6086 %}
6087 
6088 instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{
6089   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
6090   match(Set dst (AddVS src (LoadVector mem)));
6091   format %{ "vpaddw  $dst,$src,$mem\t! add packed2S" %}
6092   ins_encode %{
6093     int vector_len = 0;
6094     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6095   %}
6096   ins_pipe( pipe_slow );
6097 %}
6098 
6099 instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{
6100   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
6101   match(Set dst (AddVS src (LoadVector mem)));
6102   format %{ "vpaddw  $dst,$src,$mem\t! add packed2S" %}
6103   ins_encode %{
6104     int vector_len = 0;
6105     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6106   %}
6107   ins_pipe( pipe_slow );
6108 %}
6109 
6110 instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{
6111   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
6112   match(Set dst (AddVS dst (LoadVector mem)));
6113   effect(TEMP src);
6114   format %{ "vpaddw  $dst,$src,$mem\t! add packed2S" %}
6115   ins_encode %{
6116     int vector_len = 0;
6117     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6118   %}
6119   ins_pipe( pipe_slow );
6120 %}
6121 
6122 instruct vadd4S(vecD dst, vecD src) %{
6123   predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
6124   match(Set dst (AddVS dst src));
6125   format %{ "paddw   $dst,$src\t! add packed4S" %}
6126   ins_encode %{
6127     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
6128   %}
6129   ins_pipe( pipe_slow );
6130 %}
6131 
6132 instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{
6133   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
6134   match(Set dst (AddVS src1 src2));
6135   format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
6136   ins_encode %{
6137     int vector_len = 0;
6138     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6139   %}
6140   ins_pipe( pipe_slow );
6141 %}
6142 
6143 instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{
6144   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
6145   match(Set dst (AddVS src1 src2));
6146   format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
6147   ins_encode %{
6148     int vector_len = 0;
6149     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6150   %}
6151   ins_pipe( pipe_slow );
6152 %}
6153 
6154 instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
6155   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
6156   match(Set dst (AddVS dst src2));
6157   effect(TEMP src1);
6158   format %{ "vpaddw  $dst,$dst,$src2\t! add packed4S" %}
6159   ins_encode %{
6160     int vector_len = 0;
6161     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6162   %}
6163   ins_pipe( pipe_slow );
6164 %}
6165 
6166 instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{
6167   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
6168   match(Set dst (AddVS src (LoadVector mem)));
6169   format %{ "vpaddw  $dst,$src,$mem\t! add packed4S" %}
6170   ins_encode %{
6171     int vector_len = 0;
6172     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6173   %}
6174   ins_pipe( pipe_slow );
6175 %}
6176 
6177 instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{
6178   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
6179   match(Set dst (AddVS src (LoadVector mem)));
6180   format %{ "vpaddw  $dst,$src,$mem\t! add packed4S" %}
6181   ins_encode %{
6182     int vector_len = 0;
6183     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6184   %}
6185   ins_pipe( pipe_slow );
6186 %}
6187 
6188 instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{
6189   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
6190   match(Set dst (AddVS dst (LoadVector mem)));
6191   effect(TEMP src);
6192   format %{ "vpaddw  $dst,$src,$mem\t! add packed4S" %}
6193   ins_encode %{
6194     int vector_len = 0;
6195     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6196   %}
6197   ins_pipe( pipe_slow );
6198 %}
6199 
6200 instruct vadd8S(vecX dst, vecX src) %{
6201   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
6202   match(Set dst (AddVS dst src));
6203   format %{ "paddw   $dst,$src\t! add packed8S" %}
6204   ins_encode %{
6205     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
6206   %}
6207   ins_pipe( pipe_slow );
6208 %}
6209 
6210 instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{
6211   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
6212   match(Set dst (AddVS src1 src2));
6213   format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
6214   ins_encode %{
6215     int vector_len = 0;
6216     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6217   %}
6218   ins_pipe( pipe_slow );
6219 %}
6220 
6221 instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{
6222   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
6223   match(Set dst (AddVS src1 src2));
6224   format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
6225   ins_encode %{
6226     int vector_len = 0;
6227     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6228   %}
6229   ins_pipe( pipe_slow );
6230 %}
6231 
6232 instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
6233   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
6234   match(Set dst (AddVS dst src2));
6235   effect(TEMP src1);
6236   format %{ "vpaddw  $dst,$dst,$src2\t! add packed8S" %}
6237   ins_encode %{
6238     int vector_len = 0;
6239     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6240   %}
6241   ins_pipe( pipe_slow );
6242 %}
6243 
6244 instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{
6245   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
6246   match(Set dst (AddVS src (LoadVector mem)));
6247   format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
6248   ins_encode %{
6249     int vector_len = 0;
6250     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6251   %}
6252   ins_pipe( pipe_slow );
6253 %}
6254 
6255 instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{
6256   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
6257   match(Set dst (AddVS src (LoadVector mem)));
6258   format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
6259   ins_encode %{
6260     int vector_len = 0;
6261     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6262   %}
6263   ins_pipe( pipe_slow );
6264 %}
6265 
6266 instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{
6267   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
6268   match(Set dst (AddVS dst (LoadVector mem)));
6269   effect(TEMP src);
6270   format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
6271   ins_encode %{
6272     int vector_len = 0;
6273     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6274   %}
6275   ins_pipe( pipe_slow );
6276 %}
6277 
6278 instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{
6279   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
6280   match(Set dst (AddVS src1 src2));
6281   format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
6282   ins_encode %{
6283     int vector_len = 1;
6284     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6285   %}
6286   ins_pipe( pipe_slow );
6287 %}
6288 
6289 instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{
6290   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
6291   match(Set dst (AddVS src1 src2));
6292   format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
6293   ins_encode %{
6294     int vector_len = 1;
6295     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6296   %}
6297   ins_pipe( pipe_slow );
6298 %}
6299 
6300 instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
6301   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
6302   match(Set dst (AddVS dst src2));
6303   effect(TEMP src1);
6304   format %{ "vpaddw  $dst,$dst,$src2\t! add packed16S" %}
6305   ins_encode %{
6306     int vector_len = 1;
6307     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6308   %}
6309   ins_pipe( pipe_slow );
6310 %}
6311 
6312 instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{
6313   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
6314   match(Set dst (AddVS src (LoadVector mem)));
6315   format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
6316   ins_encode %{
6317     int vector_len = 1;
6318     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6319   %}
6320   ins_pipe( pipe_slow );
6321 %}
6322 
6323 instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{
6324   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
6325   match(Set dst (AddVS src (LoadVector mem)));
6326   format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
6327   ins_encode %{
6328     int vector_len = 1;
6329     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6330   %}
6331   ins_pipe( pipe_slow );
6332 %}
6333 
6334 instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{
6335   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
6336   match(Set dst (AddVS dst (LoadVector mem)));
6337   effect(TEMP src);
6338   format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
6339   ins_encode %{
6340     int vector_len = 1;
6341     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6342   %}
6343   ins_pipe( pipe_slow );
6344 %}
6345 
6346 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
6347   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
6348   match(Set dst (AddVS src1 src2));
6349   format %{ "vpaddw  $dst,$src1,$src2\t! add packed32S" %}
6350   ins_encode %{
6351     int vector_len = 2;
6352     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6353   %}
6354   ins_pipe( pipe_slow );
6355 %}
6356 
6357 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{
6358   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
6359   match(Set dst (AddVS src (LoadVector mem)));
6360   format %{ "vpaddw  $dst,$src,$mem\t! add packed32S" %}
6361   ins_encode %{
6362     int vector_len = 2;
6363     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6364   %}
6365   ins_pipe( pipe_slow );
6366 %}
6367 
6368 // Integers vector add
6369 instruct vadd2I(vecD dst, vecD src) %{
6370   predicate(n->as_Vector()->length() == 2);
6371   match(Set dst (AddVI dst src));
6372   format %{ "paddd   $dst,$src\t! add packed2I" %}
6373   ins_encode %{
6374     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
6375   %}
6376   ins_pipe( pipe_slow );
6377 %}
6378 
6379 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
6380   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6381   match(Set dst (AddVI src1 src2));
6382   format %{ "vpaddd  $dst,$src1,$src2\t! add packed2I" %}
6383   ins_encode %{
6384     int vector_len = 0;
6385     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6386   %}
6387   ins_pipe( pipe_slow );
6388 %}
6389 
6390 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{
6391   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6392   match(Set dst (AddVI src (LoadVector mem)));
6393   format %{ "vpaddd  $dst,$src,$mem\t! add packed2I" %}
6394   ins_encode %{
6395     int vector_len = 0;
6396     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6397   %}
6398   ins_pipe( pipe_slow );
6399 %}
6400 
6401 instruct vadd4I(vecX dst, vecX src) %{
6402   predicate(n->as_Vector()->length() == 4);
6403   match(Set dst (AddVI dst src));
6404   format %{ "paddd   $dst,$src\t! add packed4I" %}
6405   ins_encode %{
6406     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
6407   %}
6408   ins_pipe( pipe_slow );
6409 %}
6410 
6411 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
6412   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6413   match(Set dst (AddVI src1 src2));
6414   format %{ "vpaddd  $dst,$src1,$src2\t! add packed4I" %}
6415   ins_encode %{
6416     int vector_len = 0;
6417     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6418   %}
6419   ins_pipe( pipe_slow );
6420 %}
6421 
6422 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
6423   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6424   match(Set dst (AddVI src (LoadVector mem)));
6425   format %{ "vpaddd  $dst,$src,$mem\t! add packed4I" %}
6426   ins_encode %{
6427     int vector_len = 0;
6428     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6429   %}
6430   ins_pipe( pipe_slow );
6431 %}
6432 
6433 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
6434   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
6435   match(Set dst (AddVI src1 src2));
6436   format %{ "vpaddd  $dst,$src1,$src2\t! add packed8I" %}
6437   ins_encode %{
6438     int vector_len = 1;
6439     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6440   %}
6441   ins_pipe( pipe_slow );
6442 %}
6443 
6444 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
6445   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
6446   match(Set dst (AddVI src (LoadVector mem)));
6447   format %{ "vpaddd  $dst,$src,$mem\t! add packed8I" %}
6448   ins_encode %{
6449     int vector_len = 1;
6450     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6451   %}
6452   ins_pipe( pipe_slow );
6453 %}
6454 
6455 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{
6456   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6457   match(Set dst (AddVI src1 src2));
6458   format %{ "vpaddd  $dst,$src1,$src2\t! add packed16I" %}
6459   ins_encode %{
6460     int vector_len = 2;
6461     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6462   %}
6463   ins_pipe( pipe_slow );
6464 %}
6465 
6466 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{
6467   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6468   match(Set dst (AddVI src (LoadVector mem)));
6469   format %{ "vpaddd  $dst,$src,$mem\t! add packed16I" %}
6470   ins_encode %{
6471     int vector_len = 2;
6472     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6473   %}
6474   ins_pipe( pipe_slow );
6475 %}
6476 
6477 // Longs vector add
6478 instruct vadd2L(vecX dst, vecX src) %{
6479   predicate(n->as_Vector()->length() == 2);
6480   match(Set dst (AddVL dst src));
6481   format %{ "paddq   $dst,$src\t! add packed2L" %}
6482   ins_encode %{
6483     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
6484   %}
6485   ins_pipe( pipe_slow );
6486 %}
6487 
6488 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
6489   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6490   match(Set dst (AddVL src1 src2));
6491   format %{ "vpaddq  $dst,$src1,$src2\t! add packed2L" %}
6492   ins_encode %{
6493     int vector_len = 0;
6494     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6495   %}
6496   ins_pipe( pipe_slow );
6497 %}
6498 
6499 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
6500   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6501   match(Set dst (AddVL src (LoadVector mem)));
6502   format %{ "vpaddq  $dst,$src,$mem\t! add packed2L" %}
6503   ins_encode %{
6504     int vector_len = 0;
6505     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6506   %}
6507   ins_pipe( pipe_slow );
6508 %}
6509 
6510 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
6511   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
6512   match(Set dst (AddVL src1 src2));
6513   format %{ "vpaddq  $dst,$src1,$src2\t! add packed4L" %}
6514   ins_encode %{
6515     int vector_len = 1;
6516     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6517   %}
6518   ins_pipe( pipe_slow );
6519 %}
6520 
6521 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
6522   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
6523   match(Set dst (AddVL src (LoadVector mem)));
6524   format %{ "vpaddq  $dst,$src,$mem\t! add packed4L" %}
6525   ins_encode %{
6526     int vector_len = 1;
6527     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6528   %}
6529   ins_pipe( pipe_slow );
6530 %}
6531 
6532 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{
6533   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6534   match(Set dst (AddVL src1 src2));
6535   format %{ "vpaddq  $dst,$src1,$src2\t! add packed8L" %}
6536   ins_encode %{
6537     int vector_len = 2;
6538     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6539   %}
6540   ins_pipe( pipe_slow );
6541 %}
6542 
6543 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{
6544   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6545   match(Set dst (AddVL src (LoadVector mem)));
6546   format %{ "vpaddq  $dst,$src,$mem\t! add packed8L" %}
6547   ins_encode %{
6548     int vector_len = 2;
6549     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6550   %}
6551   ins_pipe( pipe_slow );
6552 %}
6553 
6554 // Floats vector add
6555 instruct vadd2F(vecD dst, vecD src) %{
6556   predicate(n->as_Vector()->length() == 2);
6557   match(Set dst (AddVF dst src));
6558   format %{ "addps   $dst,$src\t! add packed2F" %}
6559   ins_encode %{
6560     __ addps($dst$$XMMRegister, $src$$XMMRegister);
6561   %}
6562   ins_pipe( pipe_slow );
6563 %}
6564 
6565 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
6566   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6567   match(Set dst (AddVF src1 src2));
6568   format %{ "vaddps  $dst,$src1,$src2\t! add packed2F" %}
6569   ins_encode %{
6570     int vector_len = 0;
6571     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6572   %}
6573   ins_pipe( pipe_slow );
6574 %}
6575 
6576 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{
6577   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6578   match(Set dst (AddVF src (LoadVector mem)));
6579   format %{ "vaddps  $dst,$src,$mem\t! add packed2F" %}
6580   ins_encode %{
6581     int vector_len = 0;
6582     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6583   %}
6584   ins_pipe( pipe_slow );
6585 %}
6586 
6587 instruct vadd4F(vecX dst, vecX src) %{
6588   predicate(n->as_Vector()->length() == 4);
6589   match(Set dst (AddVF dst src));
6590   format %{ "addps   $dst,$src\t! add packed4F" %}
6591   ins_encode %{
6592     __ addps($dst$$XMMRegister, $src$$XMMRegister);
6593   %}
6594   ins_pipe( pipe_slow );
6595 %}
6596 
6597 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
6598   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6599   match(Set dst (AddVF src1 src2));
6600   format %{ "vaddps  $dst,$src1,$src2\t! add packed4F" %}
6601   ins_encode %{
6602     int vector_len = 0;
6603     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6604   %}
6605   ins_pipe( pipe_slow );
6606 %}
6607 
6608 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
6609   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6610   match(Set dst (AddVF src (LoadVector mem)));
6611   format %{ "vaddps  $dst,$src,$mem\t! add packed4F" %}
6612   ins_encode %{
6613     int vector_len = 0;
6614     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6615   %}
6616   ins_pipe( pipe_slow );
6617 %}
6618 
6619 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
6620   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6621   match(Set dst (AddVF src1 src2));
6622   format %{ "vaddps  $dst,$src1,$src2\t! add packed8F" %}
6623   ins_encode %{
6624     int vector_len = 1;
6625     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6626   %}
6627   ins_pipe( pipe_slow );
6628 %}
6629 
6630 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
6631   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6632   match(Set dst (AddVF src (LoadVector mem)));
6633   format %{ "vaddps  $dst,$src,$mem\t! add packed8F" %}
6634   ins_encode %{
6635     int vector_len = 1;
6636     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6637   %}
6638   ins_pipe( pipe_slow );
6639 %}
6640 
6641 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
6642   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6643   match(Set dst (AddVF src1 src2));
6644   format %{ "vaddps  $dst,$src1,$src2\t! add packed16F" %}
6645   ins_encode %{
6646     int vector_len = 2;
6647     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6648   %}
6649   ins_pipe( pipe_slow );
6650 %}
6651 
6652 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{
6653   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6654   match(Set dst (AddVF src (LoadVector mem)));
6655   format %{ "vaddps  $dst,$src,$mem\t! add packed16F" %}
6656   ins_encode %{
6657     int vector_len = 2;
6658     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6659   %}
6660   ins_pipe( pipe_slow );
6661 %}
6662 
6663 // Doubles vector add
6664 instruct vadd2D(vecX dst, vecX src) %{
6665   predicate(n->as_Vector()->length() == 2);
6666   match(Set dst (AddVD dst src));
6667   format %{ "addpd   $dst,$src\t! add packed2D" %}
6668   ins_encode %{
6669     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
6670   %}
6671   ins_pipe( pipe_slow );
6672 %}
6673 
6674 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
6675   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6676   match(Set dst (AddVD src1 src2));
6677   format %{ "vaddpd  $dst,$src1,$src2\t! add packed2D" %}
6678   ins_encode %{
6679     int vector_len = 0;
6680     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6681   %}
6682   ins_pipe( pipe_slow );
6683 %}
6684 
6685 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
6686   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6687   match(Set dst (AddVD src (LoadVector mem)));
6688   format %{ "vaddpd  $dst,$src,$mem\t! add packed2D" %}
6689   ins_encode %{
6690     int vector_len = 0;
6691     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6692   %}
6693   ins_pipe( pipe_slow );
6694 %}
6695 
6696 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
6697   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6698   match(Set dst (AddVD src1 src2));
6699   format %{ "vaddpd  $dst,$src1,$src2\t! add packed4D" %}
6700   ins_encode %{
6701     int vector_len = 1;
6702     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6703   %}
6704   ins_pipe( pipe_slow );
6705 %}
6706 
6707 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
6708   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6709   match(Set dst (AddVD src (LoadVector mem)));
6710   format %{ "vaddpd  $dst,$src,$mem\t! add packed4D" %}
6711   ins_encode %{
6712     int vector_len = 1;
6713     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6714   %}
6715   ins_pipe( pipe_slow );
6716 %}
6717 
6718 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
6719   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6720   match(Set dst (AddVD src1 src2));
6721   format %{ "vaddpd  $dst,$src1,$src2\t! add packed8D" %}
6722   ins_encode %{
6723     int vector_len = 2;
6724     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6725   %}
6726   ins_pipe( pipe_slow );
6727 %}
6728 
6729 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{
6730   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6731   match(Set dst (AddVD src (LoadVector mem)));
6732   format %{ "vaddpd  $dst,$src,$mem\t! add packed8D" %}
6733   ins_encode %{
6734     int vector_len = 2;
6735     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6736   %}
6737   ins_pipe( pipe_slow );
6738 %}
6739 
6740 // --------------------------------- SUB --------------------------------------
6741 
6742 // Bytes vector sub
6743 instruct vsub4B(vecS dst, vecS src) %{
6744   predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
6745   match(Set dst (SubVB dst src));
6746   format %{ "psubb   $dst,$src\t! sub packed4B" %}
6747   ins_encode %{
6748     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
6749   %}
6750   ins_pipe( pipe_slow );
6751 %}
6752 
6753 instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{
6754   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
6755   match(Set dst (SubVB src1 src2));
6756   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
6757   ins_encode %{
6758     int vector_len = 0;
6759     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6760   %}
6761   ins_pipe( pipe_slow );
6762 %}
6763 
6764 instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{
6765   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
6766   match(Set dst (SubVB src1 src2));
6767   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
6768   ins_encode %{
6769     int vector_len = 0;
6770     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6771   %}
6772   ins_pipe( pipe_slow );
6773 %}
6774 
6775 instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{
6776   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
6777   match(Set dst (SubVB dst src2));
6778   effect(TEMP src1);
6779   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
6780   ins_encode %{
6781     int vector_len = 0;
6782     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6783   %}
6784   ins_pipe( pipe_slow );
6785 %}
6786 
6787 instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{
6788   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
6789   match(Set dst (SubVB src (LoadVector mem)));
6790   format %{ "vpsubb  $dst,$src,$mem\t! sub packed4B" %}
6791   ins_encode %{
6792     int vector_len = 0;
6793     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6794   %}
6795   ins_pipe( pipe_slow );
6796 %}
6797 
6798 instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{
6799   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
6800   match(Set dst (SubVB src (LoadVector mem)));
6801   format %{ "vpsubb  $dst,$src,$mem\t! sub packed4B" %}
6802   ins_encode %{
6803     int vector_len = 0;
6804     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6805   %}
6806   ins_pipe( pipe_slow );
6807 %}
6808 
6809 instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{
6810   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
6811   match(Set dst (SubVB dst (LoadVector mem)));
6812   effect(TEMP src);
6813   format %{ "vpsubb  $dst,$src,$mem\t! sub packed4B" %}
6814   ins_encode %{
6815     int vector_len = 0;
6816     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6817   %}
6818   ins_pipe( pipe_slow );
6819 %}
6820 
6821 instruct vsub8B(vecD dst, vecD src) %{
6822   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
6823   match(Set dst (SubVB dst src));
6824   format %{ "psubb   $dst,$src\t! sub packed8B" %}
6825   ins_encode %{
6826     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
6827   %}
6828   ins_pipe( pipe_slow );
6829 %}
6830 
6831 instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{
6832   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
6833   match(Set dst (SubVB src1 src2));
6834   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
6835   ins_encode %{
6836     int vector_len = 0;
6837     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6838   %}
6839   ins_pipe( pipe_slow );
6840 %}
6841 
6842 instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{
6843   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
6844   match(Set dst (SubVB src1 src2));
6845   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
6846   ins_encode %{
6847     int vector_len = 0;
6848     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6849   %}
6850   ins_pipe( pipe_slow );
6851 %}
6852 
6853 instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
6854   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
6855   match(Set dst (SubVB dst src2));
6856   effect(TEMP src1);
6857   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
6858   ins_encode %{
6859     int vector_len = 0;
6860     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6861   %}
6862   ins_pipe( pipe_slow );
6863 %}
6864 
6865 instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{
6866   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
6867   match(Set dst (SubVB src (LoadVector mem)));
6868   format %{ "vpsubb  $dst,$src,$mem\t! sub packed8B" %}
6869   ins_encode %{
6870     int vector_len = 0;
6871     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6872   %}
6873   ins_pipe( pipe_slow );
6874 %}
6875 
6876 instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{
6877   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
6878   match(Set dst (SubVB src (LoadVector mem)));
6879   format %{ "vpsubb  $dst,$src,$mem\t! sub packed8B" %}
6880   ins_encode %{
6881     int vector_len = 0;
6882     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6883   %}
6884   ins_pipe( pipe_slow );
6885 %}
6886 
6887 instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{
6888   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
6889   match(Set dst (SubVB dst (LoadVector mem)));
6890   effect(TEMP src);
6891   format %{ "vpsubb  $dst,$src,$mem\t! sub packed8B" %}
6892   ins_encode %{
6893     int vector_len = 0;
6894     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6895   %}
6896   ins_pipe( pipe_slow );
6897 %}
6898 
6899 instruct vsub16B(vecX dst, vecX src) %{
6900   predicate(UseAVX == 0 && n->as_Vector()->length() == 16);
6901   match(Set dst (SubVB dst src));
6902   format %{ "psubb   $dst,$src\t! sub packed16B" %}
6903   ins_encode %{
6904     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
6905   %}
6906   ins_pipe( pipe_slow );
6907 %}
6908 
6909 instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{
6910   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
6911   match(Set dst (SubVB src1 src2));
6912   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
6913   ins_encode %{
6914     int vector_len = 0;
6915     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6916   %}
6917   ins_pipe( pipe_slow );
6918 %}
6919 
6920 instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{
6921   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
6922   match(Set dst (SubVB src1 src2));
6923   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
6924   ins_encode %{
6925     int vector_len = 0;
6926     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6927   %}
6928   ins_pipe( pipe_slow );
6929 %}
6930 
6931 instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
6932   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
6933   match(Set dst (SubVB dst src2));
6934   effect(TEMP src1);
6935   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
6936   ins_encode %{
6937     int vector_len = 0;
6938     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6939   %}
6940   ins_pipe( pipe_slow );
6941 %}
6942 
6943 instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{
6944   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
6945   match(Set dst (SubVB src (LoadVector mem)));
6946   format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
6947   ins_encode %{
6948     int vector_len = 0;
6949     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6950   %}
6951   ins_pipe( pipe_slow );
6952 %}
6953 
6954 instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{
6955   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
6956   match(Set dst (SubVB src (LoadVector mem)));
6957   format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
6958   ins_encode %{
6959     int vector_len = 0;
6960     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6961   %}
6962   ins_pipe( pipe_slow );
6963 %}
6964 
6965 instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{
6966   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
6967   match(Set dst (SubVB dst (LoadVector mem)));
6968   effect(TEMP src);
6969   format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
6970   ins_encode %{
6971     int vector_len = 0;
6972     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6973   %}
6974   ins_pipe( pipe_slow );
6975 %}
6976 
6977 instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{
6978   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
6979   match(Set dst (SubVB src1 src2));
6980   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
6981   ins_encode %{
6982     int vector_len = 1;
6983     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6984   %}
6985   ins_pipe( pipe_slow );
6986 %}
6987 
6988 instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{
6989   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
6990   match(Set dst (SubVB src1 src2));
6991   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
6992   ins_encode %{
6993     int vector_len = 1;
6994     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6995   %}
6996   ins_pipe( pipe_slow );
6997 %}
6998 
6999 instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
7000   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
7001   match(Set dst (SubVB dst src2));
7002   effect(TEMP src1);
7003   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
7004   ins_encode %{
7005     int vector_len = 1;
7006     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7007   %}
7008   ins_pipe( pipe_slow );
7009 %}
7010 
7011 instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{
7012   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
7013   match(Set dst (SubVB src (LoadVector mem)));
7014   format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
7015   ins_encode %{
7016     int vector_len = 1;
7017     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7018   %}
7019   ins_pipe( pipe_slow );
7020 %}
7021 
7022 instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{
7023   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
7024   match(Set dst (SubVB src (LoadVector mem)));
7025   format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
7026   ins_encode %{
7027     int vector_len = 1;
7028     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7029   %}
7030   ins_pipe( pipe_slow );
7031 %}
7032 
7033 instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{
7034   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
7035   match(Set dst (SubVB dst (LoadVector mem)));
7036   effect(TEMP src);
7037   format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
7038   ins_encode %{
7039     int vector_len = 1;
7040     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7041   %}
7042   ins_pipe( pipe_slow );
7043 %}
7044 
7045 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
7046   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
7047   match(Set dst (SubVB src1 src2));
7048   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed64B" %}
7049   ins_encode %{
7050     int vector_len = 2;
7051     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7052   %}
7053   ins_pipe( pipe_slow );
7054 %}
7055 
7056 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{
7057   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
7058   match(Set dst (SubVB src (LoadVector mem)));
7059   format %{ "vpsubb  $dst,$src,$mem\t! sub packed64B" %}
7060   ins_encode %{
7061     int vector_len = 2;
7062     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7063   %}
7064   ins_pipe( pipe_slow );
7065 %}
7066 
7067 // Shorts/Chars vector sub
7068 instruct vsub2S(vecS dst, vecS src) %{
7069   predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
7070   match(Set dst (SubVS dst src));
7071   format %{ "psubw   $dst,$src\t! sub packed2S" %}
7072   ins_encode %{
7073     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
7074   %}
7075   ins_pipe( pipe_slow );
7076 %}
7077 
7078 instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{
7079   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
7080   match(Set dst (SubVS src1 src2));
7081   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
7082   ins_encode %{
7083     int vector_len = 0;
7084     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7085   %}
7086   ins_pipe( pipe_slow );
7087 %}
7088 
7089 instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{
7090   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
7091   match(Set dst (SubVS src1 src2));
7092   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
7093   ins_encode %{
7094     int vector_len = 0;
7095     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7096   %}
7097   ins_pipe( pipe_slow );
7098 %}
7099 
7100 instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{
7101   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
7102   match(Set dst (SubVS dst src2));
7103   effect(TEMP src1);
7104   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
7105   ins_encode %{
7106     int vector_len = 0;
7107     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7108   %}
7109   ins_pipe( pipe_slow );
7110 %}
7111 
7112 instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{
7113   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
7114   match(Set dst (SubVS src (LoadVector mem)));
7115   format %{ "vpsubw  $dst,$src,$mem\t! sub packed2S" %}
7116   ins_encode %{
7117     int vector_len = 0;
7118     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7119   %}
7120   ins_pipe( pipe_slow );
7121 %}
7122 
7123 instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{
7124   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
7125   match(Set dst (SubVS src (LoadVector mem)));
7126   format %{ "vpsubw  $dst,$src,$mem\t! sub packed2S" %}
7127   ins_encode %{
7128     int vector_len = 0;
7129     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7130   %}
7131   ins_pipe( pipe_slow );
7132 %}
7133 
7134 instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{
7135   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
7136   match(Set dst (SubVS dst (LoadVector mem)));
7137   effect(TEMP src);
7138   format %{ "vpsubw  $dst,$src,$mem\t! sub packed2S" %}
7139   ins_encode %{
7140     int vector_len = 0;
7141     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7142   %}
7143   ins_pipe( pipe_slow );
7144 %}
7145 
7146 instruct vsub4S(vecD dst, vecD src) %{
7147   predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
7148   match(Set dst (SubVS dst src));
7149   format %{ "psubw   $dst,$src\t! sub packed4S" %}
7150   ins_encode %{
7151     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
7152   %}
7153   ins_pipe( pipe_slow );
7154 %}
7155 
7156 instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{
7157   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
7158   match(Set dst (SubVS src1 src2));
7159   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
7160   ins_encode %{
7161     int vector_len = 0;
7162     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7163   %}
7164   ins_pipe( pipe_slow );
7165 %}
7166 
7167 instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{
7168   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
7169   match(Set dst (SubVS src1 src2));
7170   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
7171   ins_encode %{
7172     int vector_len = 0;
7173     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7174   %}
7175   ins_pipe( pipe_slow );
7176 %}
7177 
7178 instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
7179   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
7180   match(Set dst (SubVS dst src2));
7181   effect(TEMP src1);
7182   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
7183   ins_encode %{
7184     int vector_len = 0;
7185     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7186   %}
7187   ins_pipe( pipe_slow );
7188 %}
7189 
7190 instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{
7191   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
7192   match(Set dst (SubVS src (LoadVector mem)));
7193   format %{ "vpsubw  $dst,$src,$mem\t! sub packed4S" %}
7194   ins_encode %{
7195     int vector_len = 0;
7196     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7197   %}
7198   ins_pipe( pipe_slow );
7199 %}
7200 
7201 instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{
7202   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
7203   match(Set dst (SubVS src (LoadVector mem)));
7204   format %{ "vpsubw  $dst,$src,$mem\t! sub packed4S" %}
7205   ins_encode %{
7206     int vector_len = 0;
7207     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7208   %}
7209   ins_pipe( pipe_slow );
7210 %}
7211 
7212 instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{
7213   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
7214   match(Set dst (SubVS dst (LoadVector mem)));
7215   effect(TEMP src);
7216   format %{ "vpsubw  $dst,$src,$mem\t! sub packed4S" %}
7217   ins_encode %{
7218     int vector_len = 0;
7219     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7220   %}
7221   ins_pipe( pipe_slow );
7222 %}
7223 
7224 instruct vsub8S(vecX dst, vecX src) %{
7225   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
7226   match(Set dst (SubVS dst src));
7227   format %{ "psubw   $dst,$src\t! sub packed8S" %}
7228   ins_encode %{
7229     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
7230   %}
7231   ins_pipe( pipe_slow );
7232 %}
7233 
7234 instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{
7235   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
7236   match(Set dst (SubVS src1 src2));
7237   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
7238   ins_encode %{
7239     int vector_len = 0;
7240     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7241   %}
7242   ins_pipe( pipe_slow );
7243 %}
7244 
7245 instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{
7246   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
7247   match(Set dst (SubVS src1 src2));
7248   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
7249   ins_encode %{
7250     int vector_len = 0;
7251     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7252   %}
7253   ins_pipe( pipe_slow );
7254 %}
7255 
7256 instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
7257   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
7258   match(Set dst (SubVS dst src2));
7259   effect(TEMP src1);
7260   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
7261   ins_encode %{
7262     int vector_len = 0;
7263     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7264   %}
7265   ins_pipe( pipe_slow );
7266 %}
7267 
7268 instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{
7269   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
7270   match(Set dst (SubVS src (LoadVector mem)));
7271   format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
7272   ins_encode %{
7273     int vector_len = 0;
7274     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7275   %}
7276   ins_pipe( pipe_slow );
7277 %}
7278 
7279 instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{
7280   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
7281   match(Set dst (SubVS src (LoadVector mem)));
7282   format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
7283   ins_encode %{
7284     int vector_len = 0;
7285     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7286   %}
7287   ins_pipe( pipe_slow );
7288 %}
7289 
7290 instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{
7291   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
7292   match(Set dst (SubVS dst (LoadVector mem)));
7293   effect(TEMP src);
7294   format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
7295   ins_encode %{
7296     int vector_len = 0;
7297     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7298   %}
7299   ins_pipe( pipe_slow );
7300 %}
7301 
7302 instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{
7303   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
7304   match(Set dst (SubVS src1 src2));
7305   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
7306   ins_encode %{
7307     int vector_len = 1;
7308     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7309   %}
7310   ins_pipe( pipe_slow );
7311 %}
7312 
7313 instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{
7314   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
7315   match(Set dst (SubVS src1 src2));
7316   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
7317   ins_encode %{
7318     int vector_len = 1;
7319     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7320   %}
7321   ins_pipe( pipe_slow );
7322 %}
7323 
7324 instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
7325   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
7326   match(Set dst (SubVS dst src2));
7327   effect(TEMP src1);
7328   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
7329   ins_encode %{
7330     int vector_len = 1;
7331     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7332   %}
7333   ins_pipe( pipe_slow );
7334 %}
7335 
7336 instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{
7337   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
7338   match(Set dst (SubVS src (LoadVector mem)));
7339   format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
7340   ins_encode %{
7341     int vector_len = 1;
7342     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7343   %}
7344   ins_pipe( pipe_slow );
7345 %}
7346 
7347 instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{
7348   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
7349   match(Set dst (SubVS src (LoadVector mem)));
7350   format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
7351   ins_encode %{
7352     int vector_len = 1;
7353     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7354   %}
7355   ins_pipe( pipe_slow );
7356 %}
7357 
7358 instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{
7359   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
7360   match(Set dst (SubVS dst (LoadVector mem)));
7361    effect(TEMP src);
7362   format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
7363   ins_encode %{
7364     int vector_len = 1;
7365     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7366   %}
7367   ins_pipe( pipe_slow );
7368 %}
7369 
7370 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
7371   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
7372   match(Set dst (SubVS src1 src2));
7373   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed32S" %}
7374   ins_encode %{
7375     int vector_len = 2;
7376     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7377   %}
7378   ins_pipe( pipe_slow );
7379 %}
7380 
7381 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{
7382   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
7383   match(Set dst (SubVS src (LoadVector mem)));
7384   format %{ "vpsubw  $dst,$src,$mem\t! sub packed32S" %}
7385   ins_encode %{
7386     int vector_len = 2;
7387     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7388   %}
7389   ins_pipe( pipe_slow );
7390 %}
7391 
7392 // Integers vector sub
7393 instruct vsub2I(vecD dst, vecD src) %{
7394   predicate(n->as_Vector()->length() == 2);
7395   match(Set dst (SubVI dst src));
7396   format %{ "psubd   $dst,$src\t! sub packed2I" %}
7397   ins_encode %{
7398     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
7399   %}
7400   ins_pipe( pipe_slow );
7401 %}
7402 
7403 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
7404   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7405   match(Set dst (SubVI src1 src2));
7406   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed2I" %}
7407   ins_encode %{
7408     int vector_len = 0;
7409     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7410   %}
7411   ins_pipe( pipe_slow );
7412 %}
7413 
7414 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{
7415   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7416   match(Set dst (SubVI src (LoadVector mem)));
7417   format %{ "vpsubd  $dst,$src,$mem\t! sub packed2I" %}
7418   ins_encode %{
7419     int vector_len = 0;
7420     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7421   %}
7422   ins_pipe( pipe_slow );
7423 %}
7424 
7425 instruct vsub4I(vecX dst, vecX src) %{
7426   predicate(n->as_Vector()->length() == 4);
7427   match(Set dst (SubVI dst src));
7428   format %{ "psubd   $dst,$src\t! sub packed4I" %}
7429   ins_encode %{
7430     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
7431   %}
7432   ins_pipe( pipe_slow );
7433 %}
7434 
7435 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
7436   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7437   match(Set dst (SubVI src1 src2));
7438   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed4I" %}
7439   ins_encode %{
7440     int vector_len = 0;
7441     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7442   %}
7443   ins_pipe( pipe_slow );
7444 %}
7445 
7446 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
7447   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7448   match(Set dst (SubVI src (LoadVector mem)));
7449   format %{ "vpsubd  $dst,$src,$mem\t! sub packed4I" %}
7450   ins_encode %{
7451     int vector_len = 0;
7452     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7453   %}
7454   ins_pipe( pipe_slow );
7455 %}
7456 
7457 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
7458   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7459   match(Set dst (SubVI src1 src2));
7460   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed8I" %}
7461   ins_encode %{
7462     int vector_len = 1;
7463     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7464   %}
7465   ins_pipe( pipe_slow );
7466 %}
7467 
7468 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
7469   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7470   match(Set dst (SubVI src (LoadVector mem)));
7471   format %{ "vpsubd  $dst,$src,$mem\t! sub packed8I" %}
7472   ins_encode %{
7473     int vector_len = 1;
7474     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7475   %}
7476   ins_pipe( pipe_slow );
7477 %}
7478 
7479 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{
7480   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7481   match(Set dst (SubVI src1 src2));
7482   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed16I" %}
7483   ins_encode %{
7484     int vector_len = 2;
7485     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7486   %}
7487   ins_pipe( pipe_slow );
7488 %}
7489 
7490 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{
7491   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7492   match(Set dst (SubVI src (LoadVector mem)));
7493   format %{ "vpsubd  $dst,$src,$mem\t! sub packed16I" %}
7494   ins_encode %{
7495     int vector_len = 2;
7496     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7497   %}
7498   ins_pipe( pipe_slow );
7499 %}
7500 
7501 // Longs vector sub
7502 instruct vsub2L(vecX dst, vecX src) %{
7503   predicate(n->as_Vector()->length() == 2);
7504   match(Set dst (SubVL dst src));
7505   format %{ "psubq   $dst,$src\t! sub packed2L" %}
7506   ins_encode %{
7507     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
7508   %}
7509   ins_pipe( pipe_slow );
7510 %}
7511 
7512 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
7513   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7514   match(Set dst (SubVL src1 src2));
7515   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed2L" %}
7516   ins_encode %{
7517     int vector_len = 0;
7518     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7519   %}
7520   ins_pipe( pipe_slow );
7521 %}
7522 
7523 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
7524   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7525   match(Set dst (SubVL src (LoadVector mem)));
7526   format %{ "vpsubq  $dst,$src,$mem\t! sub packed2L" %}
7527   ins_encode %{
7528     int vector_len = 0;
7529     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7530   %}
7531   ins_pipe( pipe_slow );
7532 %}
7533 
7534 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
7535   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
7536   match(Set dst (SubVL src1 src2));
7537   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed4L" %}
7538   ins_encode %{
7539     int vector_len = 1;
7540     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7541   %}
7542   ins_pipe( pipe_slow );
7543 %}
7544 
7545 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
7546   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
7547   match(Set dst (SubVL src (LoadVector mem)));
7548   format %{ "vpsubq  $dst,$src,$mem\t! sub packed4L" %}
7549   ins_encode %{
7550     int vector_len = 1;
7551     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7552   %}
7553   ins_pipe( pipe_slow );
7554 %}
7555 
7556 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{
7557   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7558   match(Set dst (SubVL src1 src2));
7559   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed8L" %}
7560   ins_encode %{
7561     int vector_len = 2;
7562     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7563   %}
7564   ins_pipe( pipe_slow );
7565 %}
7566 
7567 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{
7568   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7569   match(Set dst (SubVL src (LoadVector mem)));
7570   format %{ "vpsubq  $dst,$src,$mem\t! sub packed8L" %}
7571   ins_encode %{
7572     int vector_len = 2;
7573     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7574   %}
7575   ins_pipe( pipe_slow );
7576 %}
7577 
7578 // Floats vector sub
7579 instruct vsub2F(vecD dst, vecD src) %{
7580   predicate(n->as_Vector()->length() == 2);
7581   match(Set dst (SubVF dst src));
7582   format %{ "subps   $dst,$src\t! sub packed2F" %}
7583   ins_encode %{
7584     __ subps($dst$$XMMRegister, $src$$XMMRegister);
7585   %}
7586   ins_pipe( pipe_slow );
7587 %}
7588 
7589 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
7590   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7591   match(Set dst (SubVF src1 src2));
7592   format %{ "vsubps  $dst,$src1,$src2\t! sub packed2F" %}
7593   ins_encode %{
7594     int vector_len = 0;
7595     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7596   %}
7597   ins_pipe( pipe_slow );
7598 %}
7599 
7600 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{
7601   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7602   match(Set dst (SubVF src (LoadVector mem)));
7603   format %{ "vsubps  $dst,$src,$mem\t! sub packed2F" %}
7604   ins_encode %{
7605     int vector_len = 0;
7606     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7607   %}
7608   ins_pipe( pipe_slow );
7609 %}
7610 
7611 instruct vsub4F(vecX dst, vecX src) %{
7612   predicate(n->as_Vector()->length() == 4);
7613   match(Set dst (SubVF dst src));
7614   format %{ "subps   $dst,$src\t! sub packed4F" %}
7615   ins_encode %{
7616     __ subps($dst$$XMMRegister, $src$$XMMRegister);
7617   %}
7618   ins_pipe( pipe_slow );
7619 %}
7620 
7621 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
7622   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7623   match(Set dst (SubVF src1 src2));
7624   format %{ "vsubps  $dst,$src1,$src2\t! sub packed4F" %}
7625   ins_encode %{
7626     int vector_len = 0;
7627     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7628   %}
7629   ins_pipe( pipe_slow );
7630 %}
7631 
7632 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
7633   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7634   match(Set dst (SubVF src (LoadVector mem)));
7635   format %{ "vsubps  $dst,$src,$mem\t! sub packed4F" %}
7636   ins_encode %{
7637     int vector_len = 0;
7638     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7639   %}
7640   ins_pipe( pipe_slow );
7641 %}
7642 
7643 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
7644   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7645   match(Set dst (SubVF src1 src2));
7646   format %{ "vsubps  $dst,$src1,$src2\t! sub packed8F" %}
7647   ins_encode %{
7648     int vector_len = 1;
7649     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7650   %}
7651   ins_pipe( pipe_slow );
7652 %}
7653 
7654 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
7655   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7656   match(Set dst (SubVF src (LoadVector mem)));
7657   format %{ "vsubps  $dst,$src,$mem\t! sub packed8F" %}
7658   ins_encode %{
7659     int vector_len = 1;
7660     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7661   %}
7662   ins_pipe( pipe_slow );
7663 %}
7664 
7665 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
7666   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7667   match(Set dst (SubVF src1 src2));
7668   format %{ "vsubps  $dst,$src1,$src2\t! sub packed16F" %}
7669   ins_encode %{
7670     int vector_len = 2;
7671     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7672   %}
7673   ins_pipe( pipe_slow );
7674 %}
7675 
7676 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{
7677   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7678   match(Set dst (SubVF src (LoadVector mem)));
7679   format %{ "vsubps  $dst,$src,$mem\t! sub packed16F" %}
7680   ins_encode %{
7681     int vector_len = 2;
7682     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7683   %}
7684   ins_pipe( pipe_slow );
7685 %}
7686 
7687 // Doubles vector sub
7688 instruct vsub2D(vecX dst, vecX src) %{
7689   predicate(n->as_Vector()->length() == 2);
7690   match(Set dst (SubVD dst src));
7691   format %{ "subpd   $dst,$src\t! sub packed2D" %}
7692   ins_encode %{
7693     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
7694   %}
7695   ins_pipe( pipe_slow );
7696 %}
7697 
7698 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
7699   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7700   match(Set dst (SubVD src1 src2));
7701   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed2D" %}
7702   ins_encode %{
7703     int vector_len = 0;
7704     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7705   %}
7706   ins_pipe( pipe_slow );
7707 %}
7708 
7709 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
7710   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7711   match(Set dst (SubVD src (LoadVector mem)));
7712   format %{ "vsubpd  $dst,$src,$mem\t! sub packed2D" %}
7713   ins_encode %{
7714     int vector_len = 0;
7715     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7716   %}
7717   ins_pipe( pipe_slow );
7718 %}
7719 
7720 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
7721   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7722   match(Set dst (SubVD src1 src2));
7723   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed4D" %}
7724   ins_encode %{
7725     int vector_len = 1;
7726     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7727   %}
7728   ins_pipe( pipe_slow );
7729 %}
7730 
7731 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
7732   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7733   match(Set dst (SubVD src (LoadVector mem)));
7734   format %{ "vsubpd  $dst,$src,$mem\t! sub packed4D" %}
7735   ins_encode %{
7736     int vector_len = 1;
7737     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7738   %}
7739   ins_pipe( pipe_slow );
7740 %}
7741 
7742 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
7743   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7744   match(Set dst (SubVD src1 src2));
7745   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed8D" %}
7746   ins_encode %{
7747     int vector_len = 2;
7748     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7749   %}
7750   ins_pipe( pipe_slow );
7751 %}
7752 
7753 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{
7754   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7755   match(Set dst (SubVD src (LoadVector mem)));
7756   format %{ "vsubpd  $dst,$src,$mem\t! sub packed8D" %}
7757   ins_encode %{
7758     int vector_len = 2;
7759     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7760   %}
7761   ins_pipe( pipe_slow );
7762 %}
7763 
7764 // --------------------------------- MUL --------------------------------------
7765 
7766 // Shorts/Chars vector mul
7767 instruct vmul2S(vecS dst, vecS src) %{
7768   predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
7769   match(Set dst (MulVS dst src));
7770   format %{ "pmullw $dst,$src\t! mul packed2S" %}
7771   ins_encode %{
7772     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
7773   %}
7774   ins_pipe( pipe_slow );
7775 %}
7776 
7777 instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{
7778   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
7779   match(Set dst (MulVS src1 src2));
7780   format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
7781   ins_encode %{
7782     int vector_len = 0;
7783     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7784   %}
7785   ins_pipe( pipe_slow );
7786 %}
7787 
7788 instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{
7789   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
7790   match(Set dst (MulVS src1 src2));
7791   format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
7792   ins_encode %{
7793     int vector_len = 0;
7794     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7795   %}
7796   ins_pipe( pipe_slow );
7797 %}
7798 
7799 instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{
7800   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
7801   match(Set dst (MulVS dst src2));
7802   effect(TEMP src1);
7803   format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
7804   ins_encode %{
7805     int vector_len = 0;
7806     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7807   %}
7808   ins_pipe( pipe_slow );
7809 %}
7810 
7811 instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{
7812   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
7813   match(Set dst (MulVS src (LoadVector mem)));
7814   format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
7815   ins_encode %{
7816     int vector_len = 0;
7817     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7818   %}
7819   ins_pipe( pipe_slow );
7820 %}
7821 
7822 instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{
7823   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
7824   match(Set dst (MulVS src (LoadVector mem)));
7825   format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
7826   ins_encode %{
7827     int vector_len = 0;
7828     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7829   %}
7830   ins_pipe( pipe_slow );
7831 %}
7832 
7833 instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{
7834   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
7835   match(Set dst (MulVS dst (LoadVector mem)));
7836   effect(TEMP src);
7837   format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
7838   ins_encode %{
7839     int vector_len = 0;
7840     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7841   %}
7842   ins_pipe( pipe_slow );
7843 %}
7844 
7845 instruct vmul4S(vecD dst, vecD src) %{
7846   predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
7847   match(Set dst (MulVS dst src));
7848   format %{ "pmullw  $dst,$src\t! mul packed4S" %}
7849   ins_encode %{
7850     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
7851   %}
7852   ins_pipe( pipe_slow );
7853 %}
7854 
7855 instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{
7856   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
7857   match(Set dst (MulVS src1 src2));
7858   format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
7859   ins_encode %{
7860     int vector_len = 0;
7861     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7862   %}
7863   ins_pipe( pipe_slow );
7864 %}
7865 
7866 instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{
7867   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
7868   match(Set dst (MulVS src1 src2));
7869   format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
7870   ins_encode %{
7871     int vector_len = 0;
7872     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7873   %}
7874   ins_pipe( pipe_slow );
7875 %}
7876 
7877 instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
7878   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
7879   match(Set dst (MulVS dst src2));
7880   effect(TEMP src1);
7881   format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
7882   ins_encode %{
7883     int vector_len = 0;
7884     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7885   %}
7886   ins_pipe( pipe_slow );
7887 %}
7888 
7889 instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{
7890   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
7891   match(Set dst (MulVS src (LoadVector mem)));
7892   format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
7893   ins_encode %{
7894     int vector_len = 0;
7895     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7896   %}
7897   ins_pipe( pipe_slow );
7898 %}
7899 
7900 instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{
7901   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
7902   match(Set dst (MulVS src (LoadVector mem)));
7903   format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
7904   ins_encode %{
7905     int vector_len = 0;
7906     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7907   %}
7908   ins_pipe( pipe_slow );
7909 %}
7910 
7911 instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{
7912   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
7913   match(Set dst (MulVS dst (LoadVector mem)));
7914   effect(TEMP src);
7915   format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
7916   ins_encode %{
7917     int vector_len = 0;
7918     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7919   %}
7920   ins_pipe( pipe_slow );
7921 %}
7922 
7923 instruct vmul8S(vecX dst, vecX src) %{
7924   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
7925   match(Set dst (MulVS dst src));
7926   format %{ "pmullw  $dst,$src\t! mul packed8S" %}
7927   ins_encode %{
7928     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
7929   %}
7930   ins_pipe( pipe_slow );
7931 %}
7932 
7933 instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{
7934   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
7935   match(Set dst (MulVS src1 src2));
7936   format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
7937   ins_encode %{
7938     int vector_len = 0;
7939     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7940   %}
7941   ins_pipe( pipe_slow );
7942 %}
7943 
7944 instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{
7945   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
7946   match(Set dst (MulVS src1 src2));
7947   format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
7948   ins_encode %{
7949     int vector_len = 0;
7950     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7951   %}
7952   ins_pipe( pipe_slow );
7953 %}
7954 
7955 instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
7956   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
7957   match(Set dst (MulVS dst src2));
7958   effect(TEMP src1);
7959   format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
7960   ins_encode %{
7961     int vector_len = 0;
7962     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7963   %}
7964   ins_pipe( pipe_slow );
7965 %}
7966 
7967 instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{
7968   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
7969   match(Set dst (MulVS src (LoadVector mem)));
7970   format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
7971   ins_encode %{
7972     int vector_len = 0;
7973     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7974   %}
7975   ins_pipe( pipe_slow );
7976 %}
7977 
7978 instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{
7979   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
7980   match(Set dst (MulVS src (LoadVector mem)));
7981   format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
7982   ins_encode %{
7983     int vector_len = 0;
7984     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7985   %}
7986   ins_pipe( pipe_slow );
7987 %}
7988 
7989 instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{
7990   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
7991   match(Set dst (MulVS dst (LoadVector mem)));
7992   effect(TEMP src);
7993   format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
7994   ins_encode %{
7995     int vector_len = 0;
7996     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7997   %}
7998   ins_pipe( pipe_slow );
7999 %}
8000 
8001 instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{
8002   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
8003   match(Set dst (MulVS src1 src2));
8004   format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
8005   ins_encode %{
8006     int vector_len = 1;
8007     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8008   %}
8009   ins_pipe( pipe_slow );
8010 %}
8011 
8012 instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{
8013   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
8014   match(Set dst (MulVS src1 src2));
8015   format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
8016   ins_encode %{
8017     int vector_len = 1;
8018     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8019   %}
8020   ins_pipe( pipe_slow );
8021 %}
8022 
8023 instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
8024   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
8025   match(Set dst (MulVS dst src2));
8026   effect(TEMP src1);
8027   format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
8028   ins_encode %{
8029     int vector_len = 1;
8030     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8031   %}
8032   ins_pipe( pipe_slow );
8033 %}
8034 
8035 instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{
8036   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
8037   match(Set dst (MulVS src (LoadVector mem)));
8038   format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
8039   ins_encode %{
8040     int vector_len = 1;
8041     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8042   %}
8043   ins_pipe( pipe_slow );
8044 %}
8045 
8046 instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{
8047   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
8048   match(Set dst (MulVS src (LoadVector mem)));
8049   format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
8050   ins_encode %{
8051     int vector_len = 1;
8052     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8053   %}
8054   ins_pipe( pipe_slow );
8055 %}
8056 
8057 instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{
8058   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
8059   match(Set dst (MulVS dst (LoadVector mem)));
8060   effect(TEMP src);
8061   format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
8062   ins_encode %{
8063     int vector_len = 1;
8064     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8065   %}
8066   ins_pipe( pipe_slow );
8067 %}
8068 
8069 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
8070   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
8071   match(Set dst (MulVS src1 src2));
8072   format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %}
8073   ins_encode %{
8074     int vector_len = 2;
8075     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8076   %}
8077   ins_pipe( pipe_slow );
8078 %}
8079 
8080 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{
8081   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
8082   match(Set dst (MulVS src (LoadVector mem)));
8083   format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %}
8084   ins_encode %{
8085     int vector_len = 2;
8086     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8087   %}
8088   ins_pipe( pipe_slow );
8089 %}
8090 
8091 // Integers vector mul (sse4_1)
8092 instruct vmul2I(vecD dst, vecD src) %{
8093   predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
8094   match(Set dst (MulVI dst src));
8095   format %{ "pmulld  $dst,$src\t! mul packed2I" %}
8096   ins_encode %{
8097     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
8098   %}
8099   ins_pipe( pipe_slow );
8100 %}
8101 
8102 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
8103   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8104   match(Set dst (MulVI src1 src2));
8105   format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
8106   ins_encode %{
8107     int vector_len = 0;
8108     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8109   %}
8110   ins_pipe( pipe_slow );
8111 %}
8112 
8113 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{
8114   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8115   match(Set dst (MulVI src (LoadVector mem)));
8116   format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %}
8117   ins_encode %{
8118     int vector_len = 0;
8119     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8120   %}
8121   ins_pipe( pipe_slow );
8122 %}
8123 
8124 instruct vmul4I(vecX dst, vecX src) %{
8125   predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
8126   match(Set dst (MulVI dst src));
8127   format %{ "pmulld  $dst,$src\t! mul packed4I" %}
8128   ins_encode %{
8129     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
8130   %}
8131   ins_pipe( pipe_slow );
8132 %}
8133 
8134 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
8135   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8136   match(Set dst (MulVI src1 src2));
8137   format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
8138   ins_encode %{
8139     int vector_len = 0;
8140     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8141   %}
8142   ins_pipe( pipe_slow );
8143 %}
8144 
8145 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
8146   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8147   match(Set dst (MulVI src (LoadVector mem)));
8148   format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
8149   ins_encode %{
8150     int vector_len = 0;
8151     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8152   %}
8153   ins_pipe( pipe_slow );
8154 %}
8155 
8156 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{
8157   predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq());
8158   match(Set dst (MulVL src1 src2));
8159   format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %}
8160   ins_encode %{
8161     int vector_len = 0;
8162     __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8163   %}
8164   ins_pipe( pipe_slow );
8165 %}
8166 
8167 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{
8168   predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq());
8169   match(Set dst (MulVL src (LoadVector mem)));
8170   format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %}
8171   ins_encode %{
8172     int vector_len = 0;
8173     __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8174   %}
8175   ins_pipe( pipe_slow );
8176 %}
8177 
8178 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{
8179   predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq());
8180   match(Set dst (MulVL src1 src2));
8181   format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %}
8182   ins_encode %{
8183     int vector_len = 1;
8184     __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8185   %}
8186   ins_pipe( pipe_slow );
8187 %}
8188 
8189 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{
8190   predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq());
8191   match(Set dst (MulVL src (LoadVector mem)));
8192   format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %}
8193   ins_encode %{
8194     int vector_len = 1;
8195     __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8196   %}
8197   ins_pipe( pipe_slow );
8198 %}
8199 
8200 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{
8201   predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq());
8202   match(Set dst (MulVL src1 src2));
8203   format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %}
8204   ins_encode %{
8205     int vector_len = 2;
8206     __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8207   %}
8208   ins_pipe( pipe_slow );
8209 %}
8210 
8211 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{
8212   predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq());
8213   match(Set dst (MulVL src (LoadVector mem)));
8214   format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %}
8215   ins_encode %{
8216     int vector_len = 2;
8217     __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8218   %}
8219   ins_pipe( pipe_slow );
8220 %}
8221 
8222 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
8223   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
8224   match(Set dst (MulVI src1 src2));
8225   format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
8226   ins_encode %{
8227     int vector_len = 1;
8228     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8229   %}
8230   ins_pipe( pipe_slow );
8231 %}
8232 
8233 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
8234   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
8235   match(Set dst (MulVI src (LoadVector mem)));
8236   format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
8237   ins_encode %{
8238     int vector_len = 1;
8239     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8240   %}
8241   ins_pipe( pipe_slow );
8242 %}
8243 
8244 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{
8245   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
8246   match(Set dst (MulVI src1 src2));
8247   format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %}
8248   ins_encode %{
8249     int vector_len = 2;
8250     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8251   %}
8252   ins_pipe( pipe_slow );
8253 %}
8254 
8255 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{
8256   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
8257   match(Set dst (MulVI src (LoadVector mem)));
8258   format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %}
8259   ins_encode %{
8260     int vector_len = 2;
8261     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8262   %}
8263   ins_pipe( pipe_slow );
8264 %}
8265 
8266 // Floats vector mul
8267 instruct vmul2F(vecD dst, vecD src) %{
8268   predicate(n->as_Vector()->length() == 2);
8269   match(Set dst (MulVF dst src));
8270   format %{ "mulps   $dst,$src\t! mul packed2F" %}
8271   ins_encode %{
8272     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
8273   %}
8274   ins_pipe( pipe_slow );
8275 %}
8276 
8277 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
8278   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8279   match(Set dst (MulVF src1 src2));
8280   format %{ "vmulps  $dst,$src1,$src2\t! mul packed2F" %}
8281   ins_encode %{
8282     int vector_len = 0;
8283     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8284   %}
8285   ins_pipe( pipe_slow );
8286 %}
8287 
8288 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{
8289   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8290   match(Set dst (MulVF src (LoadVector mem)));
8291   format %{ "vmulps  $dst,$src,$mem\t! mul packed2F" %}
8292   ins_encode %{
8293     int vector_len = 0;
8294     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8295   %}
8296   ins_pipe( pipe_slow );
8297 %}
8298 
8299 instruct vmul4F(vecX dst, vecX src) %{
8300   predicate(n->as_Vector()->length() == 4);
8301   match(Set dst (MulVF dst src));
8302   format %{ "mulps   $dst,$src\t! mul packed4F" %}
8303   ins_encode %{
8304     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
8305   %}
8306   ins_pipe( pipe_slow );
8307 %}
8308 
8309 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
8310   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8311   match(Set dst (MulVF src1 src2));
8312   format %{ "vmulps  $dst,$src1,$src2\t! mul packed4F" %}
8313   ins_encode %{
8314     int vector_len = 0;
8315     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8316   %}
8317   ins_pipe( pipe_slow );
8318 %}
8319 
8320 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
8321   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8322   match(Set dst (MulVF src (LoadVector mem)));
8323   format %{ "vmulps  $dst,$src,$mem\t! mul packed4F" %}
8324   ins_encode %{
8325     int vector_len = 0;
8326     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8327   %}
8328   ins_pipe( pipe_slow );
8329 %}
8330 
8331 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
8332   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
8333   match(Set dst (MulVF src1 src2));
8334   format %{ "vmulps  $dst,$src1,$src2\t! mul packed8F" %}
8335   ins_encode %{
8336     int vector_len = 1;
8337     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8338   %}
8339   ins_pipe( pipe_slow );
8340 %}
8341 
8342 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
8343   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
8344   match(Set dst (MulVF src (LoadVector mem)));
8345   format %{ "vmulps  $dst,$src,$mem\t! mul packed8F" %}
8346   ins_encode %{
8347     int vector_len = 1;
8348     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8349   %}
8350   ins_pipe( pipe_slow );
8351 %}
8352 
8353 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
8354   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
8355   match(Set dst (MulVF src1 src2));
8356   format %{ "vmulps  $dst,$src1,$src2\t! mul packed16F" %}
8357   ins_encode %{
8358     int vector_len = 2;
8359     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8360   %}
8361   ins_pipe( pipe_slow );
8362 %}
8363 
8364 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{
8365   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
8366   match(Set dst (MulVF src (LoadVector mem)));
8367   format %{ "vmulps  $dst,$src,$mem\t! mul packed16F" %}
8368   ins_encode %{
8369     int vector_len = 2;
8370     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8371   %}
8372   ins_pipe( pipe_slow );
8373 %}
8374 
8375 // Doubles vector mul
8376 instruct vmul2D(vecX dst, vecX src) %{
8377   predicate(n->as_Vector()->length() == 2);
8378   match(Set dst (MulVD dst src));
8379   format %{ "mulpd   $dst,$src\t! mul packed2D" %}
8380   ins_encode %{
8381     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
8382   %}
8383   ins_pipe( pipe_slow );
8384 %}
8385 
8386 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
8387   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8388   match(Set dst (MulVD src1 src2));
8389   format %{ "vmulpd  $dst,$src1,$src2\t! mul packed2D" %}
8390   ins_encode %{
8391     int vector_len = 0;
8392     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8393   %}
8394   ins_pipe( pipe_slow );
8395 %}
8396 
8397 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
8398   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8399   match(Set dst (MulVD src (LoadVector mem)));
8400   format %{ "vmulpd  $dst,$src,$mem\t! mul packed2D" %}
8401   ins_encode %{
8402     int vector_len = 0;
8403     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8404   %}
8405   ins_pipe( pipe_slow );
8406 %}
8407 
8408 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
8409   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8410   match(Set dst (MulVD src1 src2));
8411   format %{ "vmulpd  $dst,$src1,$src2\t! mul packed4D" %}
8412   ins_encode %{
8413     int vector_len = 1;
8414     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8415   %}
8416   ins_pipe( pipe_slow );
8417 %}
8418 
8419 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
8420   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8421   match(Set dst (MulVD src (LoadVector mem)));
8422   format %{ "vmulpd  $dst,$src,$mem\t! mul packed4D" %}
8423   ins_encode %{
8424     int vector_len = 1;
8425     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8426   %}
8427   ins_pipe( pipe_slow );
8428 %}
8429 
8430 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
8431   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
8432   match(Set dst (MulVD src1 src2));
8433   format %{ "vmulpd  $dst k0,$src1,$src2\t! mul packed8D" %}
8434   ins_encode %{
8435     int vector_len = 2;
8436     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8437   %}
8438   ins_pipe( pipe_slow );
8439 %}
8440 
8441 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{
8442   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
8443   match(Set dst (MulVD src (LoadVector mem)));
8444   format %{ "vmulpd  $dst k0,$src,$mem\t! mul packed8D" %}
8445   ins_encode %{
8446     int vector_len = 2;
8447     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8448   %}
8449   ins_pipe( pipe_slow );
8450 %}
8451 
8452 instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{
8453   predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4);
8454   match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2)));
8455   effect(TEMP dst, USE src1, USE src2);
8456   format %{ "cmppd.$copnd  $dst, $src1, $src2  ! vcmovevd, cond=$cop\n\t"
8457             "vpblendd $dst,$src1,$src2,$dst ! vcmovevd\n\t"
8458          %}
8459   ins_encode %{
8460     int vector_len = 1;
8461     int cond = (Assembler::Condition)($copnd$$cmpcode);
8462     __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len);
8463     __ vpblendd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len);
8464   %}
8465   ins_pipe( pipe_slow );
8466 %}
8467 
8468 // --------------------------------- DIV --------------------------------------
8469 
8470 // Floats vector div
8471 instruct vdiv2F(vecD dst, vecD src) %{
8472   predicate(n->as_Vector()->length() == 2);
8473   match(Set dst (DivVF dst src));
8474   format %{ "divps   $dst,$src\t! div packed2F" %}
8475   ins_encode %{
8476     __ divps($dst$$XMMRegister, $src$$XMMRegister);
8477   %}
8478   ins_pipe( pipe_slow );
8479 %}
8480 
8481 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
8482   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8483   match(Set dst (DivVF src1 src2));
8484   format %{ "vdivps  $dst,$src1,$src2\t! div packed2F" %}
8485   ins_encode %{
8486     int vector_len = 0;
8487     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8488   %}
8489   ins_pipe( pipe_slow );
8490 %}
8491 
8492 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{
8493   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8494   match(Set dst (DivVF src (LoadVector mem)));
8495   format %{ "vdivps  $dst,$src,$mem\t! div packed2F" %}
8496   ins_encode %{
8497     int vector_len = 0;
8498     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8499   %}
8500   ins_pipe( pipe_slow );
8501 %}
8502 
8503 instruct vdiv4F(vecX dst, vecX src) %{
8504   predicate(n->as_Vector()->length() == 4);
8505   match(Set dst (DivVF dst src));
8506   format %{ "divps   $dst,$src\t! div packed4F" %}
8507   ins_encode %{
8508     __ divps($dst$$XMMRegister, $src$$XMMRegister);
8509   %}
8510   ins_pipe( pipe_slow );
8511 %}
8512 
8513 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
8514   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8515   match(Set dst (DivVF src1 src2));
8516   format %{ "vdivps  $dst,$src1,$src2\t! div packed4F" %}
8517   ins_encode %{
8518     int vector_len = 0;
8519     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8520   %}
8521   ins_pipe( pipe_slow );
8522 %}
8523 
8524 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
8525   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8526   match(Set dst (DivVF src (LoadVector mem)));
8527   format %{ "vdivps  $dst,$src,$mem\t! div packed4F" %}
8528   ins_encode %{
8529     int vector_len = 0;
8530     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8531   %}
8532   ins_pipe( pipe_slow );
8533 %}
8534 
8535 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
8536   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
8537   match(Set dst (DivVF src1 src2));
8538   format %{ "vdivps  $dst,$src1,$src2\t! div packed8F" %}
8539   ins_encode %{
8540     int vector_len = 1;
8541     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8542   %}
8543   ins_pipe( pipe_slow );
8544 %}
8545 
8546 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
8547   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
8548   match(Set dst (DivVF src (LoadVector mem)));
8549   format %{ "vdivps  $dst,$src,$mem\t! div packed8F" %}
8550   ins_encode %{
8551     int vector_len = 1;
8552     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8553   %}
8554   ins_pipe( pipe_slow );
8555 %}
8556 
8557 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
8558   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
8559   match(Set dst (DivVF src1 src2));
8560   format %{ "vdivps  $dst,$src1,$src2\t! div packed16F" %}
8561   ins_encode %{
8562     int vector_len = 2;
8563     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8564   %}
8565   ins_pipe( pipe_slow );
8566 %}
8567 
8568 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{
8569   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
8570   match(Set dst (DivVF src (LoadVector mem)));
8571   format %{ "vdivps  $dst,$src,$mem\t! div packed16F" %}
8572   ins_encode %{
8573     int vector_len = 2;
8574     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8575   %}
8576   ins_pipe( pipe_slow );
8577 %}
8578 
8579 // Doubles vector div
8580 instruct vdiv2D(vecX dst, vecX src) %{
8581   predicate(n->as_Vector()->length() == 2);
8582   match(Set dst (DivVD dst src));
8583   format %{ "divpd   $dst,$src\t! div packed2D" %}
8584   ins_encode %{
8585     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
8586   %}
8587   ins_pipe( pipe_slow );
8588 %}
8589 
8590 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
8591   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8592   match(Set dst (DivVD src1 src2));
8593   format %{ "vdivpd  $dst,$src1,$src2\t! div packed2D" %}
8594   ins_encode %{
8595     int vector_len = 0;
8596     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8597   %}
8598   ins_pipe( pipe_slow );
8599 %}
8600 
8601 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
8602   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8603   match(Set dst (DivVD src (LoadVector mem)));
8604   format %{ "vdivpd  $dst,$src,$mem\t! div packed2D" %}
8605   ins_encode %{
8606     int vector_len = 0;
8607     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8608   %}
8609   ins_pipe( pipe_slow );
8610 %}
8611 
8612 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
8613   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8614   match(Set dst (DivVD src1 src2));
8615   format %{ "vdivpd  $dst,$src1,$src2\t! div packed4D" %}
8616   ins_encode %{
8617     int vector_len = 1;
8618     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8619   %}
8620   ins_pipe( pipe_slow );
8621 %}
8622 
8623 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
8624   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8625   match(Set dst (DivVD src (LoadVector mem)));
8626   format %{ "vdivpd  $dst,$src,$mem\t! div packed4D" %}
8627   ins_encode %{
8628     int vector_len = 1;
8629     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8630   %}
8631   ins_pipe( pipe_slow );
8632 %}
8633 
8634 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
8635   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
8636   match(Set dst (DivVD src1 src2));
8637   format %{ "vdivpd  $dst,$src1,$src2\t! div packed8D" %}
8638   ins_encode %{
8639     int vector_len = 2;
8640     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8641   %}
8642   ins_pipe( pipe_slow );
8643 %}
8644 
8645 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{
8646   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
8647   match(Set dst (DivVD src (LoadVector mem)));
8648   format %{ "vdivpd  $dst,$src,$mem\t! div packed8D" %}
8649   ins_encode %{
8650     int vector_len = 2;
8651     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8652   %}
8653   ins_pipe( pipe_slow );
8654 %}
8655 
8656 // ------------------------------ Shift ---------------------------------------
8657 
8658 // Left and right shift count vectors are the same on x86
8659 // (only lowest bits of xmm reg are used for count).
8660 instruct vshiftcnt(vecS dst, rRegI cnt) %{
8661   match(Set dst (LShiftCntV cnt));
8662   match(Set dst (RShiftCntV cnt));
8663   format %{ "movd    $dst,$cnt\t! load shift count" %}
8664   ins_encode %{
8665     __ movdl($dst$$XMMRegister, $cnt$$Register);
8666   %}
8667   ins_pipe( pipe_slow );
8668 %}
8669 
8670 // --------------------------------- Sqrt --------------------------------------
8671 
8672 // Floating point vector sqrt - double precision only
8673 instruct vsqrt2D_reg(vecX dst, vecX src) %{
8674   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8675   match(Set dst (SqrtVD src));
8676   format %{ "vsqrtpd  $dst,$src\t! sqrt packed2D" %}
8677   ins_encode %{
8678     int vector_len = 0;
8679     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
8680   %}
8681   ins_pipe( pipe_slow );
8682 %}
8683 
8684 instruct vsqrt2D_mem(vecX dst, memory mem) %{
8685   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8686   match(Set dst (SqrtVD (LoadVector mem)));
8687   format %{ "vsqrtpd  $dst,$mem\t! sqrt packed2D" %}
8688   ins_encode %{
8689     int vector_len = 0;
8690     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len);
8691   %}
8692   ins_pipe( pipe_slow );
8693 %}
8694 
8695 instruct vsqrt4D_reg(vecY dst, vecY src) %{
8696   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8697   match(Set dst (SqrtVD src));
8698   format %{ "vsqrtpd  $dst,$src\t! sqrt packed4D" %}
8699   ins_encode %{
8700     int vector_len = 1;
8701     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
8702   %}
8703   ins_pipe( pipe_slow );
8704 %}
8705 
8706 instruct vsqrt4D_mem(vecY dst, memory mem) %{
8707   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8708   match(Set dst (SqrtVD (LoadVector mem)));
8709   format %{ "vsqrtpd  $dst,$mem\t! sqrt packed4D" %}
8710   ins_encode %{
8711     int vector_len = 1;
8712     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len);
8713   %}
8714   ins_pipe( pipe_slow );
8715 %}
8716 
8717 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{
8718   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
8719   match(Set dst (SqrtVD src));
8720   format %{ "vsqrtpd  $dst,$src\t! sqrt packed8D" %}
8721   ins_encode %{
8722     int vector_len = 2;
8723     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
8724   %}
8725   ins_pipe( pipe_slow );
8726 %}
8727 
8728 instruct vsqrt8D_mem(vecZ dst, memory mem) %{
8729   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
8730   match(Set dst (SqrtVD (LoadVector mem)));
8731   format %{ "vsqrtpd  $dst,$mem\t! sqrt packed8D" %}
8732   ins_encode %{
8733     int vector_len = 2;
8734     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len);
8735   %}
8736   ins_pipe( pipe_slow );
8737 %}
8738 
8739 // ------------------------------ LeftShift -----------------------------------
8740 
8741 // Shorts/Chars vector left shift
8742 instruct vsll2S(vecS dst, vecS shift) %{
8743   predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
8744   match(Set dst (LShiftVS dst shift));
8745   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
8746   ins_encode %{
8747     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
8748   %}
8749   ins_pipe( pipe_slow );
8750 %}
8751 
8752 instruct vsll2S_imm(vecS dst, immI8 shift) %{
8753   predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
8754   match(Set dst (LShiftVS dst shift));
8755   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
8756   ins_encode %{
8757     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
8758   %}
8759   ins_pipe( pipe_slow );
8760 %}
8761 
8762 instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{
8763   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
8764   match(Set dst (LShiftVS src shift));
8765   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
8766   ins_encode %{
8767     int vector_len = 0;
8768     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8769   %}
8770   ins_pipe( pipe_slow );
8771 %}
8772 
8773 instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{
8774   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
8775   match(Set dst (LShiftVS src shift));
8776   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
8777   ins_encode %{
8778     int vector_len = 0;
8779     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8780   %}
8781   ins_pipe( pipe_slow );
8782 %}
8783 
8784 instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{
8785   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
8786   match(Set dst (LShiftVS dst shift));
8787   effect(TEMP src);
8788   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
8789   ins_encode %{
8790     int vector_len = 0;
8791     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8792   %}
8793   ins_pipe( pipe_slow );
8794 %}
8795 
8796 instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{
8797   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
8798   match(Set dst (LShiftVS src shift));
8799   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
8800   ins_encode %{
8801     int vector_len = 0;
8802     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8803   %}
8804   ins_pipe( pipe_slow );
8805 %}
8806 
8807 instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{
8808   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
8809   match(Set dst (LShiftVS src shift));
8810   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
8811   ins_encode %{
8812     int vector_len = 0;
8813     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8814   %}
8815   ins_pipe( pipe_slow );
8816 %}
8817 
8818 instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{
8819   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
8820   match(Set dst (LShiftVS dst shift));
8821   effect(TEMP src);
8822   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
8823   ins_encode %{
8824     int vector_len = 0;
8825     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8826   %}
8827   ins_pipe( pipe_slow );
8828 %}
8829 
8830 instruct vsll4S(vecD dst, vecS shift) %{
8831   predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
8832   match(Set dst (LShiftVS dst shift));
8833   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
8834   ins_encode %{
8835     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
8836   %}
8837   ins_pipe( pipe_slow );
8838 %}
8839 
8840 instruct vsll4S_imm(vecD dst, immI8 shift) %{
8841   predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
8842   match(Set dst (LShiftVS dst shift));
8843   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
8844   ins_encode %{
8845     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
8846   %}
8847   ins_pipe( pipe_slow );
8848 %}
8849 
8850 instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{
8851   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
8852   match(Set dst (LShiftVS src shift));
8853   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
8854   ins_encode %{
8855     int vector_len = 0;
8856     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8857   %}
8858   ins_pipe( pipe_slow );
8859 %}
8860 
8861 instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{
8862   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
8863   match(Set dst (LShiftVS src shift));
8864   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
8865   ins_encode %{
8866     int vector_len = 0;
8867     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8868   %}
8869   ins_pipe( pipe_slow );
8870 %}
8871 
8872 instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{
8873   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
8874   match(Set dst (LShiftVS dst shift));
8875   effect(TEMP src);
8876   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
8877   ins_encode %{
8878     int vector_len = 0;
8879     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8880   %}
8881   ins_pipe( pipe_slow );
8882 %}
8883 
8884 instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{
8885   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
8886   match(Set dst (LShiftVS src shift));
8887   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
8888   ins_encode %{
8889     int vector_len = 0;
8890     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8891   %}
8892   ins_pipe( pipe_slow );
8893 %}
8894 
8895 instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{
8896   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
8897   match(Set dst (LShiftVS src shift));
8898   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
8899   ins_encode %{
8900     int vector_len = 0;
8901     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8902   %}
8903   ins_pipe( pipe_slow );
8904 %}
8905 
8906 instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{
8907   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
8908   match(Set dst (LShiftVS dst shift));
8909   effect(TEMP src);
8910   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
8911   ins_encode %{
8912     int vector_len = 0;
8913     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8914   %}
8915   ins_pipe( pipe_slow );
8916 %}
8917 
8918 instruct vsll8S(vecX dst, vecS shift) %{
8919   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
8920   match(Set dst (LShiftVS dst shift));
8921   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
8922   ins_encode %{
8923     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
8924   %}
8925   ins_pipe( pipe_slow );
8926 %}
8927 
8928 instruct vsll8S_imm(vecX dst, immI8 shift) %{
8929   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
8930   match(Set dst (LShiftVS dst shift));
8931   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
8932   ins_encode %{
8933     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
8934   %}
8935   ins_pipe( pipe_slow );
8936 %}
8937 
8938 instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{
8939   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
8940   match(Set dst (LShiftVS src shift));
8941   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
8942   ins_encode %{
8943     int vector_len = 0;
8944     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8945   %}
8946   ins_pipe( pipe_slow );
8947 %}
8948 
8949 instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{
8950   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
8951   match(Set dst (LShiftVS src shift));
8952   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
8953   ins_encode %{
8954     int vector_len = 0;
8955     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8956   %}
8957   ins_pipe( pipe_slow );
8958 %}
8959 
8960 instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{
8961   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
8962   match(Set dst (LShiftVS dst shift));
8963   effect(TEMP src);
8964   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
8965   ins_encode %{
8966     int vector_len = 0;
8967     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8968   %}
8969   ins_pipe( pipe_slow );
8970 %}
8971 
8972 instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{
8973   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
8974   match(Set dst (LShiftVS src shift));
8975   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
8976   ins_encode %{
8977     int vector_len = 0;
8978     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8979   %}
8980   ins_pipe( pipe_slow );
8981 %}
8982 
8983 instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{
8984   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
8985   match(Set dst (LShiftVS src shift));
8986   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
8987   ins_encode %{
8988     int vector_len = 0;
8989     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8990   %}
8991   ins_pipe( pipe_slow );
8992 %}
8993 
8994 instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{
8995   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
8996   match(Set dst (LShiftVS dst shift));
8997   effect(TEMP src);
8998   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
8999   ins_encode %{
9000     int vector_len = 0;
9001     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9002   %}
9003   ins_pipe( pipe_slow );
9004 %}
9005 
9006 instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{
9007   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
9008   match(Set dst (LShiftVS src shift));
9009   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
9010   ins_encode %{
9011     int vector_len = 1;
9012     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9013   %}
9014   ins_pipe( pipe_slow );
9015 %}
9016 
9017 instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{
9018   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
9019   match(Set dst (LShiftVS src shift));
9020   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
9021   ins_encode %{
9022     int vector_len = 1;
9023     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9024   %}
9025   ins_pipe( pipe_slow );
9026 %}
9027 
9028 instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{
9029   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
9030   match(Set dst (LShiftVS dst shift));
9031   effect(TEMP src);
9032   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
9033   ins_encode %{
9034     int vector_len = 1;
9035     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9036   %}
9037   ins_pipe( pipe_slow );
9038 %}
9039 
9040 instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{
9041   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
9042   match(Set dst (LShiftVS src shift));
9043   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
9044   ins_encode %{
9045     int vector_len = 1;
9046     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9047   %}
9048   ins_pipe( pipe_slow );
9049 %}
9050 
9051 instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{
9052   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
9053   match(Set dst (LShiftVS src shift));
9054   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
9055   ins_encode %{
9056     int vector_len = 1;
9057     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9058   %}
9059   ins_pipe( pipe_slow );
9060 %}
9061 
9062 instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{
9063   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
9064   match(Set dst (LShiftVS dst shift));
9065   effect(TEMP src);
9066   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
9067   ins_encode %{
9068     int vector_len = 1;
9069     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9070   %}
9071   ins_pipe( pipe_slow );
9072 %}
9073 
9074 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{
9075   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
9076   match(Set dst (LShiftVS src shift));
9077   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed32S" %}
9078   ins_encode %{
9079     int vector_len = 2;
9080     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9081   %}
9082   ins_pipe( pipe_slow );
9083 %}
9084 
9085 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
9086   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
9087   match(Set dst (LShiftVS src shift));
9088   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed32S" %}
9089   ins_encode %{
9090     int vector_len = 2;
9091     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9092   %}
9093   ins_pipe( pipe_slow );
9094 %}
9095 
9096 // Integers vector left shift
9097 instruct vsll2I(vecD dst, vecS shift) %{
9098   predicate(n->as_Vector()->length() == 2);
9099   match(Set dst (LShiftVI dst shift));
9100   format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
9101   ins_encode %{
9102     __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
9103   %}
9104   ins_pipe( pipe_slow );
9105 %}
9106 
9107 instruct vsll2I_imm(vecD dst, immI8 shift) %{
9108   predicate(n->as_Vector()->length() == 2);
9109   match(Set dst (LShiftVI dst shift));
9110   format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
9111   ins_encode %{
9112     __ pslld($dst$$XMMRegister, (int)$shift$$constant);
9113   %}
9114   ins_pipe( pipe_slow );
9115 %}
9116 
9117 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{
9118   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
9119   match(Set dst (LShiftVI src shift));
9120   format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
9121   ins_encode %{
9122     int vector_len = 0;
9123     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9124   %}
9125   ins_pipe( pipe_slow );
9126 %}
9127 
9128 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
9129   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
9130   match(Set dst (LShiftVI src shift));
9131   format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
9132   ins_encode %{
9133     int vector_len = 0;
9134     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9135   %}
9136   ins_pipe( pipe_slow );
9137 %}
9138 
9139 instruct vsll4I(vecX dst, vecS shift) %{
9140   predicate(n->as_Vector()->length() == 4);
9141   match(Set dst (LShiftVI dst shift));
9142   format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
9143   ins_encode %{
9144     __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
9145   %}
9146   ins_pipe( pipe_slow );
9147 %}
9148 
9149 instruct vsll4I_imm(vecX dst, immI8 shift) %{
9150   predicate(n->as_Vector()->length() == 4);
9151   match(Set dst (LShiftVI dst shift));
9152   format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
9153   ins_encode %{
9154     __ pslld($dst$$XMMRegister, (int)$shift$$constant);
9155   %}
9156   ins_pipe( pipe_slow );
9157 %}
9158 
9159 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{
9160   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
9161   match(Set dst (LShiftVI src shift));
9162   format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
9163   ins_encode %{
9164     int vector_len = 0;
9165     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9166   %}
9167   ins_pipe( pipe_slow );
9168 %}
9169 
9170 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
9171   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
9172   match(Set dst (LShiftVI src shift));
9173   format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
9174   ins_encode %{
9175     int vector_len = 0;
9176     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9177   %}
9178   ins_pipe( pipe_slow );
9179 %}
9180 
9181 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{
9182   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
9183   match(Set dst (LShiftVI src shift));
9184   format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
9185   ins_encode %{
9186     int vector_len = 1;
9187     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9188   %}
9189   ins_pipe( pipe_slow );
9190 %}
9191 
9192 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
9193   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
9194   match(Set dst (LShiftVI src shift));
9195   format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
9196   ins_encode %{
9197     int vector_len = 1;
9198     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9199   %}
9200   ins_pipe( pipe_slow );
9201 %}
9202 
9203 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{
9204   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
9205   match(Set dst (LShiftVI src shift));
9206   format %{ "vpslld  $dst,$src,$shift\t! left shift packed16I" %}
9207   ins_encode %{
9208     int vector_len = 2;
9209     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9210   %}
9211   ins_pipe( pipe_slow );
9212 %}
9213 
9214 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
9215   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
9216   match(Set dst (LShiftVI src shift));
9217   format %{ "vpslld  $dst,$src,$shift\t! left shift packed16I" %}
9218   ins_encode %{
9219     int vector_len = 2;
9220     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9221   %}
9222   ins_pipe( pipe_slow );
9223 %}
9224 
9225 // Longs vector left shift
9226 instruct vsll2L(vecX dst, vecS shift) %{
9227   predicate(n->as_Vector()->length() == 2);
9228   match(Set dst (LShiftVL dst shift));
9229   format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
9230   ins_encode %{
9231     __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
9232   %}
9233   ins_pipe( pipe_slow );
9234 %}
9235 
9236 instruct vsll2L_imm(vecX dst, immI8 shift) %{
9237   predicate(n->as_Vector()->length() == 2);
9238   match(Set dst (LShiftVL dst shift));
9239   format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
9240   ins_encode %{
9241     __ psllq($dst$$XMMRegister, (int)$shift$$constant);
9242   %}
9243   ins_pipe( pipe_slow );
9244 %}
9245 
9246 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{
9247   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
9248   match(Set dst (LShiftVL src shift));
9249   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
9250   ins_encode %{
9251     int vector_len = 0;
9252     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9253   %}
9254   ins_pipe( pipe_slow );
9255 %}
9256 
9257 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
9258   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
9259   match(Set dst (LShiftVL src shift));
9260   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
9261   ins_encode %{
9262     int vector_len = 0;
9263     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9264   %}
9265   ins_pipe( pipe_slow );
9266 %}
9267 
9268 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{
9269   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
9270   match(Set dst (LShiftVL src shift));
9271   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
9272   ins_encode %{
9273     int vector_len = 1;
9274     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9275   %}
9276   ins_pipe( pipe_slow );
9277 %}
9278 
9279 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
9280   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
9281   match(Set dst (LShiftVL src shift));
9282   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
9283   ins_encode %{
9284     int vector_len = 1;
9285     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9286   %}
9287   ins_pipe( pipe_slow );
9288 %}
9289 
9290 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{
9291   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
9292   match(Set dst (LShiftVL src shift));
9293   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed8L" %}
9294   ins_encode %{
9295     int vector_len = 2;
9296     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9297   %}
9298   ins_pipe( pipe_slow );
9299 %}
9300 
9301 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
9302   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
9303   match(Set dst (LShiftVL src shift));
9304   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed8L" %}
9305   ins_encode %{
9306     int vector_len = 2;
9307     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9308   %}
9309   ins_pipe( pipe_slow );
9310 %}
9311 
9312 // ----------------------- LogicalRightShift -----------------------------------
9313 
9314 // Shorts vector logical right shift produces incorrect Java result
9315 // for negative data because java code convert short value into int with
9316 // sign extension before a shift. But char vectors are fine since chars are
9317 // unsigned values.
9318 
9319 instruct vsrl2S(vecS dst, vecS shift) %{
9320   predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
9321   match(Set dst (URShiftVS dst shift));
9322   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
9323   ins_encode %{
9324     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
9325   %}
9326   ins_pipe( pipe_slow );
9327 %}
9328 
9329 instruct vsrl2S_imm(vecS dst, immI8 shift) %{
9330   predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
9331   match(Set dst (URShiftVS dst shift));
9332   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
9333   ins_encode %{
9334     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
9335   %}
9336   ins_pipe( pipe_slow );
9337 %}
9338 
9339 instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{
9340   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
9341   match(Set dst (URShiftVS src shift));
9342   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
9343   ins_encode %{
9344     int vector_len = 0;
9345     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9346   %}
9347   ins_pipe( pipe_slow );
9348 %}
9349 
9350 instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{
9351   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
9352   match(Set dst (URShiftVS src shift));
9353   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
9354   ins_encode %{
9355     int vector_len = 0;
9356     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9357   %}
9358   ins_pipe( pipe_slow );
9359 %}
9360 
9361 instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{
9362   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
9363   match(Set dst (URShiftVS dst shift));
9364   effect(TEMP src);
9365   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
9366   ins_encode %{
9367     int vector_len = 0;
9368     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9369   %}
9370   ins_pipe( pipe_slow );
9371 %}
9372 
9373 instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{
9374   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
9375   match(Set dst (URShiftVS src shift));
9376   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
9377   ins_encode %{
9378     int vector_len = 0;
9379     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9380   %}
9381   ins_pipe( pipe_slow );
9382 %}
9383 
9384 instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{
9385   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
9386   match(Set dst (URShiftVS src shift));
9387   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
9388   ins_encode %{
9389     int vector_len = 0;
9390     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9391   %}
9392   ins_pipe( pipe_slow );
9393 %}
9394 
9395 instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{
9396   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
9397   match(Set dst (URShiftVS dst shift));
9398   effect(TEMP src);
9399   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
9400   ins_encode %{
9401     int vector_len = 0;
9402     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9403   %}
9404   ins_pipe( pipe_slow );
9405 %}
9406 
9407 instruct vsrl4S(vecD dst, vecS shift) %{
9408   predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
9409   match(Set dst (URShiftVS dst shift));
9410   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
9411   ins_encode %{
9412     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
9413   %}
9414   ins_pipe( pipe_slow );
9415 %}
9416 
9417 instruct vsrl4S_imm(vecD dst, immI8 shift) %{
9418   predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
9419   match(Set dst (URShiftVS dst shift));
9420   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
9421   ins_encode %{
9422     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
9423   %}
9424   ins_pipe( pipe_slow );
9425 %}
9426 
9427 instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{
9428   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
9429   match(Set dst (URShiftVS src shift));
9430   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
9431   ins_encode %{
9432     int vector_len = 0;
9433     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9434   %}
9435   ins_pipe( pipe_slow );
9436 %}
9437 
9438 instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{
9439   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
9440   match(Set dst (URShiftVS src shift));
9441   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
9442   ins_encode %{
9443     int vector_len = 0;
9444     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9445   %}
9446   ins_pipe( pipe_slow );
9447 %}
9448 
9449 instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{
9450   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
9451   match(Set dst (URShiftVS dst shift));
9452   effect(TEMP src);
9453   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
9454   ins_encode %{
9455     int vector_len = 0;
9456     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9457   %}
9458   ins_pipe( pipe_slow );
9459 %}
9460 
9461 instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{
9462   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
9463   match(Set dst (URShiftVS src shift));
9464   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
9465   ins_encode %{
9466     int vector_len = 0;
9467     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9468   %}
9469   ins_pipe( pipe_slow );
9470 %}
9471 
9472 instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{
9473   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
9474   match(Set dst (URShiftVS src shift));
9475   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
9476   ins_encode %{
9477     int vector_len = 0;
9478     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9479   %}
9480   ins_pipe( pipe_slow );
9481 %}
9482 
9483 instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{
9484   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
9485   match(Set dst (URShiftVS dst shift));
9486   effect(TEMP src);
9487   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
9488   ins_encode %{
9489     int vector_len = 0;
9490     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9491   %}
9492   ins_pipe( pipe_slow );
9493 %}
9494 
9495 instruct vsrl8S(vecX dst, vecS shift) %{
9496   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
9497   match(Set dst (URShiftVS dst shift));
9498   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
9499   ins_encode %{
9500     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
9501   %}
9502   ins_pipe( pipe_slow );
9503 %}
9504 
9505 instruct vsrl8S_imm(vecX dst, immI8 shift) %{
9506   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
9507   match(Set dst (URShiftVS dst shift));
9508   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
9509   ins_encode %{
9510     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
9511   %}
9512   ins_pipe( pipe_slow );
9513 %}
9514 
9515 instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{
9516   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
9517   match(Set dst (URShiftVS src shift));
9518   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
9519   ins_encode %{
9520     int vector_len = 0;
9521     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9522   %}
9523   ins_pipe( pipe_slow );
9524 %}
9525 
9526 instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{
9527   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
9528   match(Set dst (URShiftVS src shift));
9529   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
9530   ins_encode %{
9531     int vector_len = 0;
9532     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9533   %}
9534   ins_pipe( pipe_slow );
9535 %}
9536 
9537 instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{
9538   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
9539   match(Set dst (URShiftVS dst shift));
9540   effect(TEMP src);
9541   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
9542   ins_encode %{
9543     int vector_len = 0;
9544     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9545   %}
9546   ins_pipe( pipe_slow );
9547 %}
9548 
9549 instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{
9550   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
9551   match(Set dst (URShiftVS src shift));
9552   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
9553   ins_encode %{
9554     int vector_len = 0;
9555     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9556   %}
9557   ins_pipe( pipe_slow );
9558 %}
9559 
9560 instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{
9561   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
9562   match(Set dst (URShiftVS src shift));
9563   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
9564   ins_encode %{
9565     int vector_len = 0;
9566     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9567   %}
9568   ins_pipe( pipe_slow );
9569 %}
9570 
9571 instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{
9572   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
9573   match(Set dst (URShiftVS dst shift));
9574   effect(TEMP src);
9575   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
9576   ins_encode %{
9577     int vector_len = 0;
9578     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9579   %}
9580   ins_pipe( pipe_slow );
9581 %}
9582 
9583 instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{
9584   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
9585   match(Set dst (URShiftVS src shift));
9586   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
9587   ins_encode %{
9588     int vector_len = 1;
9589     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9590   %}
9591   ins_pipe( pipe_slow );
9592 %}
9593 
9594 instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{
9595   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
9596   match(Set dst (URShiftVS src shift));
9597   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
9598   ins_encode %{
9599     int vector_len = 1;
9600     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9601   %}
9602   ins_pipe( pipe_slow );
9603 %}
9604 
9605 instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{
9606   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
9607   match(Set dst (URShiftVS dst shift));
9608   effect(TEMP src);
9609   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
9610   ins_encode %{
9611     int vector_len = 1;
9612     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9613   %}
9614   ins_pipe( pipe_slow );
9615 %}
9616 
9617 instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{
9618   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
9619   match(Set dst (URShiftVS src shift));
9620   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
9621   ins_encode %{
9622     int vector_len = 1;
9623     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9624   %}
9625   ins_pipe( pipe_slow );
9626 %}
9627 
9628 instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{
9629   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
9630   match(Set dst (URShiftVS src shift));
9631   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
9632   ins_encode %{
9633     int vector_len = 1;
9634     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9635   %}
9636   ins_pipe( pipe_slow );
9637 %}
9638 
9639 instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{
9640   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
9641   match(Set dst (URShiftVS dst shift));
9642   effect(TEMP src);
9643   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
9644   ins_encode %{
9645     int vector_len = 1;
9646     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9647   %}
9648   ins_pipe( pipe_slow );
9649 %}
9650 
9651 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{
9652   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
9653   match(Set dst (URShiftVS src shift));
9654   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed32S" %}
9655   ins_encode %{
9656     int vector_len = 2;
9657     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9658   %}
9659   ins_pipe( pipe_slow );
9660 %}
9661 
9662 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
9663   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
9664   match(Set dst (URShiftVS src shift));
9665   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed32S" %}
9666   ins_encode %{
9667     int vector_len = 2;
9668     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9669   %}
9670   ins_pipe( pipe_slow );
9671 %}
9672 
9673 // Integers vector logical right shift
9674 instruct vsrl2I(vecD dst, vecS shift) %{
9675   predicate(n->as_Vector()->length() == 2);
9676   match(Set dst (URShiftVI dst shift));
9677   format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
9678   ins_encode %{
9679     __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
9680   %}
9681   ins_pipe( pipe_slow );
9682 %}
9683 
9684 instruct vsrl2I_imm(vecD dst, immI8 shift) %{
9685   predicate(n->as_Vector()->length() == 2);
9686   match(Set dst (URShiftVI dst shift));
9687   format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
9688   ins_encode %{
9689     __ psrld($dst$$XMMRegister, (int)$shift$$constant);
9690   %}
9691   ins_pipe( pipe_slow );
9692 %}
9693 
9694 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{
9695   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
9696   match(Set dst (URShiftVI src shift));
9697   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
9698   ins_encode %{
9699     int vector_len = 0;
9700     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9701   %}
9702   ins_pipe( pipe_slow );
9703 %}
9704 
9705 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
9706   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
9707   match(Set dst (URShiftVI src shift));
9708   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
9709   ins_encode %{
9710     int vector_len = 0;
9711     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9712   %}
9713   ins_pipe( pipe_slow );
9714 %}
9715 
9716 instruct vsrl4I(vecX dst, vecS shift) %{
9717   predicate(n->as_Vector()->length() == 4);
9718   match(Set dst (URShiftVI dst shift));
9719   format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
9720   ins_encode %{
9721     __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
9722   %}
9723   ins_pipe( pipe_slow );
9724 %}
9725 
9726 instruct vsrl4I_imm(vecX dst, immI8 shift) %{
9727   predicate(n->as_Vector()->length() == 4);
9728   match(Set dst (URShiftVI dst shift));
9729   format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
9730   ins_encode %{
9731     __ psrld($dst$$XMMRegister, (int)$shift$$constant);
9732   %}
9733   ins_pipe( pipe_slow );
9734 %}
9735 
9736 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{
9737   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
9738   match(Set dst (URShiftVI src shift));
9739   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
9740   ins_encode %{
9741     int vector_len = 0;
9742     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9743   %}
9744   ins_pipe( pipe_slow );
9745 %}
9746 
9747 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
9748   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
9749   match(Set dst (URShiftVI src shift));
9750   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
9751   ins_encode %{
9752     int vector_len = 0;
9753     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9754   %}
9755   ins_pipe( pipe_slow );
9756 %}
9757 
9758 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{
9759   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
9760   match(Set dst (URShiftVI src shift));
9761   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
9762   ins_encode %{
9763     int vector_len = 1;
9764     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9765   %}
9766   ins_pipe( pipe_slow );
9767 %}
9768 
9769 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
9770   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
9771   match(Set dst (URShiftVI src shift));
9772   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
9773   ins_encode %{
9774     int vector_len = 1;
9775     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9776   %}
9777   ins_pipe( pipe_slow );
9778 %}
9779 
9780 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{
9781   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
9782   match(Set dst (URShiftVI src shift));
9783   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed16I" %}
9784   ins_encode %{
9785     int vector_len = 2;
9786     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9787   %}
9788   ins_pipe( pipe_slow );
9789 %}
9790 
9791 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
9792   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
9793   match(Set dst (URShiftVI src shift));
9794   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed16I" %}
9795   ins_encode %{
9796     int vector_len = 2;
9797     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9798   %}
9799   ins_pipe( pipe_slow );
9800 %}
9801 
9802 // Longs vector logical right shift
9803 instruct vsrl2L(vecX dst, vecS shift) %{
9804   predicate(n->as_Vector()->length() == 2);
9805   match(Set dst (URShiftVL dst shift));
9806   format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
9807   ins_encode %{
9808     __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
9809   %}
9810   ins_pipe( pipe_slow );
9811 %}
9812 
9813 instruct vsrl2L_imm(vecX dst, immI8 shift) %{
9814   predicate(n->as_Vector()->length() == 2);
9815   match(Set dst (URShiftVL dst shift));
9816   format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
9817   ins_encode %{
9818     __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
9819   %}
9820   ins_pipe( pipe_slow );
9821 %}
9822 
9823 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{
9824   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
9825   match(Set dst (URShiftVL src shift));
9826   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
9827   ins_encode %{
9828     int vector_len = 0;
9829     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9830   %}
9831   ins_pipe( pipe_slow );
9832 %}
9833 
9834 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
9835   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
9836   match(Set dst (URShiftVL src shift));
9837   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
9838   ins_encode %{
9839     int vector_len = 0;
9840     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9841   %}
9842   ins_pipe( pipe_slow );
9843 %}
9844 
9845 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{
9846   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
9847   match(Set dst (URShiftVL src shift));
9848   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
9849   ins_encode %{
9850     int vector_len = 1;
9851     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9852   %}
9853   ins_pipe( pipe_slow );
9854 %}
9855 
9856 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
9857   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
9858   match(Set dst (URShiftVL src shift));
9859   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
9860   ins_encode %{
9861     int vector_len = 1;
9862     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9863   %}
9864   ins_pipe( pipe_slow );
9865 %}
9866 
9867 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{
9868   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
9869   match(Set dst (URShiftVL src shift));
9870   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed8L" %}
9871   ins_encode %{
9872     int vector_len = 2;
9873     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9874   %}
9875   ins_pipe( pipe_slow );
9876 %}
9877 
9878 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
9879   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
9880   match(Set dst (URShiftVL src shift));
9881   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed8L" %}
9882   ins_encode %{
9883     int vector_len = 2;
9884     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9885   %}
9886   ins_pipe( pipe_slow );
9887 %}
9888 
9889 // ------------------- ArithmeticRightShift -----------------------------------
9890 
9891 // Shorts/Chars vector arithmetic right shift
9892 instruct vsra2S(vecS dst, vecS shift) %{
9893   predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
9894   match(Set dst (RShiftVS dst shift));
9895   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
9896   ins_encode %{
9897     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
9898   %}
9899   ins_pipe( pipe_slow );
9900 %}
9901 
9902 instruct vsra2S_imm(vecS dst, immI8 shift) %{
9903   predicate(n->as_Vector()->length() == 2);
9904   match(Set dst (RShiftVS dst shift));
9905   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
9906   ins_encode %{
9907     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
9908   %}
9909   ins_pipe( pipe_slow );
9910 %}
9911 
9912 instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{
9913   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
9914   match(Set dst (RShiftVS src shift));
9915   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
9916   ins_encode %{
9917     int vector_len = 0;
9918     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9919   %}
9920   ins_pipe( pipe_slow );
9921 %}
9922 
9923 instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{
9924   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
9925   match(Set dst (RShiftVS src shift));
9926   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
9927   ins_encode %{
9928     int vector_len = 0;
9929     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9930   %}
9931   ins_pipe( pipe_slow );
9932 %}
9933 
9934 instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{
9935   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
9936   match(Set dst (RShiftVS dst shift));
9937   effect(TEMP src);
9938   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
9939   ins_encode %{
9940     int vector_len = 0;
9941     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9942   %}
9943   ins_pipe( pipe_slow );
9944 %}
9945 
9946 instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{
9947   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
9948   match(Set dst (RShiftVS src shift));
9949   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
9950   ins_encode %{
9951     int vector_len = 0;
9952     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9953   %}
9954   ins_pipe( pipe_slow );
9955 %}
9956 
9957 instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{
9958   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
9959   match(Set dst (RShiftVS src shift));
9960   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
9961   ins_encode %{
9962     int vector_len = 0;
9963     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9964   %}
9965   ins_pipe( pipe_slow );
9966 %}
9967 
9968 instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{
9969   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
9970   match(Set dst (RShiftVS dst shift));
9971   effect(TEMP src);
9972   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
9973   ins_encode %{
9974     int vector_len = 0;
9975     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9976   %}
9977   ins_pipe( pipe_slow );
9978 %}
9979 
9980 instruct vsra4S(vecD dst, vecS shift) %{
9981   predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
9982   match(Set dst (RShiftVS dst shift));
9983   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
9984   ins_encode %{
9985     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
9986   %}
9987   ins_pipe( pipe_slow );
9988 %}
9989 
9990 instruct vsra4S_imm(vecD dst, immI8 shift) %{
9991   predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
9992   match(Set dst (RShiftVS dst shift));
9993   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
9994   ins_encode %{
9995     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
9996   %}
9997   ins_pipe( pipe_slow );
9998 %}
9999 
10000 instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{
10001   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
10002   match(Set dst (RShiftVS src shift));
10003   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
10004   ins_encode %{
10005     int vector_len = 0;
10006     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
10007   %}
10008   ins_pipe( pipe_slow );
10009 %}
10010 
10011 instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{
10012   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
10013   match(Set dst (RShiftVS src shift));
10014   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
10015   ins_encode %{
10016     int vector_len = 0;
10017     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
10018   %}
10019   ins_pipe( pipe_slow );
10020 %}
10021 
10022 instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{
10023   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
10024   match(Set dst (RShiftVS dst shift));
10025   effect(TEMP src);
10026   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
10027   ins_encode %{
10028     int vector_len = 0;
10029     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
10030   %}
10031   ins_pipe( pipe_slow );
10032 %}
10033 
10034 instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{
10035   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
10036   match(Set dst (RShiftVS src shift));
10037   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
10038   ins_encode %{
10039     int vector_len = 0;
10040     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
10041   %}
10042   ins_pipe( pipe_slow );
10043 %}
10044 
10045 instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{
10046   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
10047   match(Set dst (RShiftVS src shift));
10048   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
10049   ins_encode %{
10050     int vector_len = 0;
10051     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
10052   %}
10053   ins_pipe( pipe_slow );
10054 %}
10055 
10056 instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{
10057   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
10058   match(Set dst (RShiftVS dst shift));
10059   effect(TEMP src);
10060   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
10061   ins_encode %{
10062     int vector_len = 0;
10063     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
10064   %}
10065   ins_pipe( pipe_slow );
10066 %}
10067 
10068 instruct vsra8S(vecX dst, vecS shift) %{
10069   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
10070   match(Set dst (RShiftVS dst shift));
10071   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
10072   ins_encode %{
10073     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
10074   %}
10075   ins_pipe( pipe_slow );
10076 %}
10077 
10078 instruct vsra8S_imm(vecX dst, immI8 shift) %{
10079   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
10080   match(Set dst (RShiftVS dst shift));
10081   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
10082   ins_encode %{
10083     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
10084   %}
10085   ins_pipe( pipe_slow );
10086 %}
10087 
10088 instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{
10089   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
10090   match(Set dst (RShiftVS src shift));
10091   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
10092   ins_encode %{
10093     int vector_len = 0;
10094     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
10095   %}
10096   ins_pipe( pipe_slow );
10097 %}
10098 
10099 instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{
10100   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
10101   match(Set dst (RShiftVS src shift));
10102   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
10103   ins_encode %{
10104     int vector_len = 0;
10105     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
10106   %}
10107   ins_pipe( pipe_slow );
10108 %}
10109 
10110 instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{
10111   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
10112   match(Set dst (RShiftVS dst shift));
10113   effect(TEMP src);
10114   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
10115   ins_encode %{
10116     int vector_len = 0;
10117     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
10118   %}
10119   ins_pipe( pipe_slow );
10120 %}
10121 
10122 instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{
10123   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
10124   match(Set dst (RShiftVS src shift));
10125   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
10126   ins_encode %{
10127     int vector_len = 0;
10128     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
10129   %}
10130   ins_pipe( pipe_slow );
10131 %}
10132 
10133 instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{
10134   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
10135   match(Set dst (RShiftVS src shift));
10136   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
10137   ins_encode %{
10138     int vector_len = 0;
10139     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
10140   %}
10141   ins_pipe( pipe_slow );
10142 %}
10143 
10144 instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{
10145   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
10146   match(Set dst (RShiftVS dst shift));
10147   effect(TEMP src);
10148   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
10149   ins_encode %{
10150     int vector_len = 0;
10151     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
10152   %}
10153   ins_pipe( pipe_slow );
10154 %}
10155 
10156 instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{
10157   predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
10158   match(Set dst (RShiftVS src shift));
10159   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
10160   ins_encode %{
10161     int vector_len = 1;
10162     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
10163   %}
10164   ins_pipe( pipe_slow );
10165 %}
10166 
10167 instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{
10168   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
10169   match(Set dst (RShiftVS src shift));
10170   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
10171   ins_encode %{
10172     int vector_len = 1;
10173     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
10174   %}
10175   ins_pipe( pipe_slow );
10176 %}
10177 
10178 instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{
10179   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
10180   match(Set dst (RShiftVS dst shift));
10181   effect(TEMP src);
10182   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
10183   ins_encode %{
10184     int vector_len = 1;
10185     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
10186   %}
10187   ins_pipe( pipe_slow );
10188 %}
10189 
10190 instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{
10191   predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
10192   match(Set dst (RShiftVS src shift));
10193   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
10194   ins_encode %{
10195     int vector_len = 1;
10196     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
10197   %}
10198   ins_pipe( pipe_slow );
10199 %}
10200 
10201 instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{
10202   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
10203   match(Set dst (RShiftVS src shift));
10204   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
10205   ins_encode %{
10206     int vector_len = 1;
10207     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
10208   %}
10209   ins_pipe( pipe_slow );
10210 %}
10211 
10212 instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{
10213   predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
10214   match(Set dst (RShiftVS dst shift));
10215   effect(TEMP src);
10216   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
10217   ins_encode %{
10218     int vector_len = 1;
10219     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
10220   %}
10221   ins_pipe( pipe_slow );
10222 %}
10223 
10224 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{
10225   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
10226   match(Set dst (RShiftVS src shift));
10227   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed32S" %}
10228   ins_encode %{
10229     int vector_len = 2;
10230     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
10231   %}
10232   ins_pipe( pipe_slow );
10233 %}
10234 
10235 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
10236   predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
10237   match(Set dst (RShiftVS src shift));
10238   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed32S" %}
10239   ins_encode %{
10240     int vector_len = 2;
10241     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
10242   %}
10243   ins_pipe( pipe_slow );
10244 %}
10245 
10246 // Integers vector arithmetic right shift
10247 instruct vsra2I(vecD dst, vecS shift) %{
10248   predicate(n->as_Vector()->length() == 2);
10249   match(Set dst (RShiftVI dst shift));
10250   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
10251   ins_encode %{
10252     __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
10253   %}
10254   ins_pipe( pipe_slow );
10255 %}
10256 
10257 instruct vsra2I_imm(vecD dst, immI8 shift) %{
10258   predicate(n->as_Vector()->length() == 2);
10259   match(Set dst (RShiftVI dst shift));
10260   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
10261   ins_encode %{
10262     __ psrad($dst$$XMMRegister, (int)$shift$$constant);
10263   %}
10264   ins_pipe( pipe_slow );
10265 %}
10266 
10267 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{
10268   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
10269   match(Set dst (RShiftVI src shift));
10270   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
10271   ins_encode %{
10272     int vector_len = 0;
10273     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
10274   %}
10275   ins_pipe( pipe_slow );
10276 %}
10277 
10278 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
10279   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
10280   match(Set dst (RShiftVI src shift));
10281   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
10282   ins_encode %{
10283     int vector_len = 0;
10284     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
10285   %}
10286   ins_pipe( pipe_slow );
10287 %}
10288 
10289 instruct vsra4I(vecX dst, vecS shift) %{
10290   predicate(n->as_Vector()->length() == 4);
10291   match(Set dst (RShiftVI dst shift));
10292   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
10293   ins_encode %{
10294     __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
10295   %}
10296   ins_pipe( pipe_slow );
10297 %}
10298 
10299 instruct vsra4I_imm(vecX dst, immI8 shift) %{
10300   predicate(n->as_Vector()->length() == 4);
10301   match(Set dst (RShiftVI dst shift));
10302   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
10303   ins_encode %{
10304     __ psrad($dst$$XMMRegister, (int)$shift$$constant);
10305   %}
10306   ins_pipe( pipe_slow );
10307 %}
10308 
10309 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{
10310   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
10311   match(Set dst (RShiftVI src shift));
10312   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
10313   ins_encode %{
10314     int vector_len = 0;
10315     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
10316   %}
10317   ins_pipe( pipe_slow );
10318 %}
10319 
10320 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
10321   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
10322   match(Set dst (RShiftVI src shift));
10323   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
10324   ins_encode %{
10325     int vector_len = 0;
10326     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
10327   %}
10328   ins_pipe( pipe_slow );
10329 %}
10330 
10331 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{
10332   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
10333   match(Set dst (RShiftVI src shift));
10334   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
10335   ins_encode %{
10336     int vector_len = 1;
10337     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
10338   %}
10339   ins_pipe( pipe_slow );
10340 %}
10341 
10342 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
10343   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
10344   match(Set dst (RShiftVI src shift));
10345   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
10346   ins_encode %{
10347     int vector_len = 1;
10348     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
10349   %}
10350   ins_pipe( pipe_slow );
10351 %}
10352 
10353 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{
10354   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
10355   match(Set dst (RShiftVI src shift));
10356   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed16I" %}
10357   ins_encode %{
10358     int vector_len = 2;
10359     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
10360   %}
10361   ins_pipe( pipe_slow );
10362 %}
10363 
10364 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
10365   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
10366   match(Set dst (RShiftVI src shift));
10367   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed16I" %}
10368   ins_encode %{
10369     int vector_len = 2;
10370     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
10371   %}
10372   ins_pipe( pipe_slow );
10373 %}
10374 
10375 // There are no longs vector arithmetic right shift instructions.
10376 
10377 
10378 // --------------------------------- AND --------------------------------------
10379 
10380 instruct vand4B(vecS dst, vecS src) %{
10381   predicate(n->as_Vector()->length_in_bytes() == 4);
10382   match(Set dst (AndV dst src));
10383   format %{ "pand    $dst,$src\t! and vectors (4 bytes)" %}
10384   ins_encode %{
10385     __ pand($dst$$XMMRegister, $src$$XMMRegister);
10386   %}
10387   ins_pipe( pipe_slow );
10388 %}
10389 
10390 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
10391   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
10392   match(Set dst (AndV src1 src2));
10393   format %{ "vpand   $dst,$src1,$src2\t! and vectors (4 bytes)" %}
10394   ins_encode %{
10395     int vector_len = 0;
10396     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
10397   %}
10398   ins_pipe( pipe_slow );
10399 %}
10400 
10401 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{
10402   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
10403   match(Set dst (AndV src (LoadVector mem)));
10404   format %{ "vpand   $dst,$src,$mem\t! and vectors (4 bytes)" %}
10405   ins_encode %{
10406     int vector_len = 0;
10407     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
10408   %}
10409   ins_pipe( pipe_slow );
10410 %}
10411 
10412 instruct vand8B(vecD dst, vecD src) %{
10413   predicate(n->as_Vector()->length_in_bytes() == 8);
10414   match(Set dst (AndV dst src));
10415   format %{ "pand    $dst,$src\t! and vectors (8 bytes)" %}
10416   ins_encode %{
10417     __ pand($dst$$XMMRegister, $src$$XMMRegister);
10418   %}
10419   ins_pipe( pipe_slow );
10420 %}
10421 
10422 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
10423   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
10424   match(Set dst (AndV src1 src2));
10425   format %{ "vpand   $dst,$src1,$src2\t! and vectors (8 bytes)" %}
10426   ins_encode %{
10427     int vector_len = 0;
10428     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
10429   %}
10430   ins_pipe( pipe_slow );
10431 %}
10432 
10433 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{
10434   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
10435   match(Set dst (AndV src (LoadVector mem)));
10436   format %{ "vpand   $dst,$src,$mem\t! and vectors (8 bytes)" %}
10437   ins_encode %{
10438     int vector_len = 0;
10439     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
10440   %}
10441   ins_pipe( pipe_slow );
10442 %}
10443 
10444 instruct vand16B(vecX dst, vecX src) %{
10445   predicate(n->as_Vector()->length_in_bytes() == 16);
10446   match(Set dst (AndV dst src));
10447   format %{ "pand    $dst,$src\t! and vectors (16 bytes)" %}
10448   ins_encode %{
10449     __ pand($dst$$XMMRegister, $src$$XMMRegister);
10450   %}
10451   ins_pipe( pipe_slow );
10452 %}
10453 
10454 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
10455   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
10456   match(Set dst (AndV src1 src2));
10457   format %{ "vpand   $dst,$src1,$src2\t! and vectors (16 bytes)" %}
10458   ins_encode %{
10459     int vector_len = 0;
10460     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
10461   %}
10462   ins_pipe( pipe_slow );
10463 %}
10464 
10465 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
10466   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
10467   match(Set dst (AndV src (LoadVector mem)));
10468   format %{ "vpand   $dst,$src,$mem\t! and vectors (16 bytes)" %}
10469   ins_encode %{
10470     int vector_len = 0;
10471     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
10472   %}
10473   ins_pipe( pipe_slow );
10474 %}
10475 
10476 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
10477   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
10478   match(Set dst (AndV src1 src2));
10479   format %{ "vpand   $dst,$src1,$src2\t! and vectors (32 bytes)" %}
10480   ins_encode %{
10481     int vector_len = 1;
10482     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
10483   %}
10484   ins_pipe( pipe_slow );
10485 %}
10486 
10487 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
10488   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
10489   match(Set dst (AndV src (LoadVector mem)));
10490   format %{ "vpand   $dst,$src,$mem\t! and vectors (32 bytes)" %}
10491   ins_encode %{
10492     int vector_len = 1;
10493     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
10494   %}
10495   ins_pipe( pipe_slow );
10496 %}
10497 
10498 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
10499   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
10500   match(Set dst (AndV src1 src2));
10501   format %{ "vpand   $dst,$src1,$src2\t! and vectors (64 bytes)" %}
10502   ins_encode %{
10503     int vector_len = 2;
10504     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
10505   %}
10506   ins_pipe( pipe_slow );
10507 %}
10508 
10509 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{
10510   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
10511   match(Set dst (AndV src (LoadVector mem)));
10512   format %{ "vpand   $dst,$src,$mem\t! and vectors (64 bytes)" %}
10513   ins_encode %{
10514     int vector_len = 2;
10515     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
10516   %}
10517   ins_pipe( pipe_slow );
10518 %}
10519 
10520 // --------------------------------- OR ---------------------------------------
10521 
10522 instruct vor4B(vecS dst, vecS src) %{
10523   predicate(n->as_Vector()->length_in_bytes() == 4);
10524   match(Set dst (OrV dst src));
10525   format %{ "por     $dst,$src\t! or vectors (4 bytes)" %}
10526   ins_encode %{
10527     __ por($dst$$XMMRegister, $src$$XMMRegister);
10528   %}
10529   ins_pipe( pipe_slow );
10530 %}
10531 
10532 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
10533   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
10534   match(Set dst (OrV src1 src2));
10535   format %{ "vpor    $dst,$src1,$src2\t! or vectors (4 bytes)" %}
10536   ins_encode %{
10537     int vector_len = 0;
10538     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
10539   %}
10540   ins_pipe( pipe_slow );
10541 %}
10542 
10543 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{
10544   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
10545   match(Set dst (OrV src (LoadVector mem)));
10546   format %{ "vpor    $dst,$src,$mem\t! or vectors (4 bytes)" %}
10547   ins_encode %{
10548     int vector_len = 0;
10549     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
10550   %}
10551   ins_pipe( pipe_slow );
10552 %}
10553 
10554 instruct vor8B(vecD dst, vecD src) %{
10555   predicate(n->as_Vector()->length_in_bytes() == 8);
10556   match(Set dst (OrV dst src));
10557   format %{ "por     $dst,$src\t! or vectors (8 bytes)" %}
10558   ins_encode %{
10559     __ por($dst$$XMMRegister, $src$$XMMRegister);
10560   %}
10561   ins_pipe( pipe_slow );
10562 %}
10563 
10564 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
10565   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
10566   match(Set dst (OrV src1 src2));
10567   format %{ "vpor    $dst,$src1,$src2\t! or vectors (8 bytes)" %}
10568   ins_encode %{
10569     int vector_len = 0;
10570     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
10571   %}
10572   ins_pipe( pipe_slow );
10573 %}
10574 
10575 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{
10576   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
10577   match(Set dst (OrV src (LoadVector mem)));
10578   format %{ "vpor    $dst,$src,$mem\t! or vectors (8 bytes)" %}
10579   ins_encode %{
10580     int vector_len = 0;
10581     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
10582   %}
10583   ins_pipe( pipe_slow );
10584 %}
10585 
10586 instruct vor16B(vecX dst, vecX src) %{
10587   predicate(n->as_Vector()->length_in_bytes() == 16);
10588   match(Set dst (OrV dst src));
10589   format %{ "por     $dst,$src\t! or vectors (16 bytes)" %}
10590   ins_encode %{
10591     __ por($dst$$XMMRegister, $src$$XMMRegister);
10592   %}
10593   ins_pipe( pipe_slow );
10594 %}
10595 
10596 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
10597   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
10598   match(Set dst (OrV src1 src2));
10599   format %{ "vpor    $dst,$src1,$src2\t! or vectors (16 bytes)" %}
10600   ins_encode %{
10601     int vector_len = 0;
10602     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
10603   %}
10604   ins_pipe( pipe_slow );
10605 %}
10606 
10607 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
10608   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
10609   match(Set dst (OrV src (LoadVector mem)));
10610   format %{ "vpor    $dst,$src,$mem\t! or vectors (16 bytes)" %}
10611   ins_encode %{
10612     int vector_len = 0;
10613     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
10614   %}
10615   ins_pipe( pipe_slow );
10616 %}
10617 
10618 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
10619   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
10620   match(Set dst (OrV src1 src2));
10621   format %{ "vpor    $dst,$src1,$src2\t! or vectors (32 bytes)" %}
10622   ins_encode %{
10623     int vector_len = 1;
10624     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
10625   %}
10626   ins_pipe( pipe_slow );
10627 %}
10628 
10629 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
10630   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
10631   match(Set dst (OrV src (LoadVector mem)));
10632   format %{ "vpor    $dst,$src,$mem\t! or vectors (32 bytes)" %}
10633   ins_encode %{
10634     int vector_len = 1;
10635     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
10636   %}
10637   ins_pipe( pipe_slow );
10638 %}
10639 
10640 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
10641   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
10642   match(Set dst (OrV src1 src2));
10643   format %{ "vpor    $dst,$src1,$src2\t! or vectors (64 bytes)" %}
10644   ins_encode %{
10645     int vector_len = 2;
10646     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
10647   %}
10648   ins_pipe( pipe_slow );
10649 %}
10650 
10651 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{
10652   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
10653   match(Set dst (OrV src (LoadVector mem)));
10654   format %{ "vpor    $dst,$src,$mem\t! or vectors (64 bytes)" %}
10655   ins_encode %{
10656     int vector_len = 2;
10657     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
10658   %}
10659   ins_pipe( pipe_slow );
10660 %}
10661 
10662 // --------------------------------- XOR --------------------------------------
10663 
10664 instruct vxor4B(vecS dst, vecS src) %{
10665   predicate(n->as_Vector()->length_in_bytes() == 4);
10666   match(Set dst (XorV dst src));
10667   format %{ "pxor    $dst,$src\t! xor vectors (4 bytes)" %}
10668   ins_encode %{
10669     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
10670   %}
10671   ins_pipe( pipe_slow );
10672 %}
10673 
10674 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
10675   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
10676   match(Set dst (XorV src1 src2));
10677   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
10678   ins_encode %{
10679     int vector_len = 0;
10680     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
10681   %}
10682   ins_pipe( pipe_slow );
10683 %}
10684 
10685 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{
10686   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
10687   match(Set dst (XorV src (LoadVector mem)));
10688   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (4 bytes)" %}
10689   ins_encode %{
10690     int vector_len = 0;
10691     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
10692   %}
10693   ins_pipe( pipe_slow );
10694 %}
10695 
10696 instruct vxor8B(vecD dst, vecD src) %{
10697   predicate(n->as_Vector()->length_in_bytes() == 8);
10698   match(Set dst (XorV dst src));
10699   format %{ "pxor    $dst,$src\t! xor vectors (8 bytes)" %}
10700   ins_encode %{
10701     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
10702   %}
10703   ins_pipe( pipe_slow );
10704 %}
10705 
10706 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
10707   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
10708   match(Set dst (XorV src1 src2));
10709   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
10710   ins_encode %{
10711     int vector_len = 0;
10712     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
10713   %}
10714   ins_pipe( pipe_slow );
10715 %}
10716 
10717 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{
10718   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
10719   match(Set dst (XorV src (LoadVector mem)));
10720   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (8 bytes)" %}
10721   ins_encode %{
10722     int vector_len = 0;
10723     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
10724   %}
10725   ins_pipe( pipe_slow );
10726 %}
10727 
10728 instruct vxor16B(vecX dst, vecX src) %{
10729   predicate(n->as_Vector()->length_in_bytes() == 16);
10730   match(Set dst (XorV dst src));
10731   format %{ "pxor    $dst,$src\t! xor vectors (16 bytes)" %}
10732   ins_encode %{
10733     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
10734   %}
10735   ins_pipe( pipe_slow );
10736 %}
10737 
10738 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
10739   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
10740   match(Set dst (XorV src1 src2));
10741   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
10742   ins_encode %{
10743     int vector_len = 0;
10744     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
10745   %}
10746   ins_pipe( pipe_slow );
10747 %}
10748 
10749 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
10750   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
10751   match(Set dst (XorV src (LoadVector mem)));
10752   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (16 bytes)" %}
10753   ins_encode %{
10754     int vector_len = 0;
10755     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
10756   %}
10757   ins_pipe( pipe_slow );
10758 %}
10759 
10760 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
10761   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
10762   match(Set dst (XorV src1 src2));
10763   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
10764   ins_encode %{
10765     int vector_len = 1;
10766     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
10767   %}
10768   ins_pipe( pipe_slow );
10769 %}
10770 
10771 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
10772   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
10773   match(Set dst (XorV src (LoadVector mem)));
10774   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (32 bytes)" %}
10775   ins_encode %{
10776     int vector_len = 1;
10777     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
10778   %}
10779   ins_pipe( pipe_slow );
10780 %}
10781 
10782 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
10783   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
10784   match(Set dst (XorV src1 src2));
10785   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (64 bytes)" %}
10786   ins_encode %{
10787     int vector_len = 2;
10788     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
10789   %}
10790   ins_pipe( pipe_slow );
10791 %}
10792 
10793 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{
10794   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
10795   match(Set dst (XorV src (LoadVector mem)));
10796   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (64 bytes)" %}
10797   ins_encode %{
10798     int vector_len = 2;
10799     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
10800   %}
10801   ins_pipe( pipe_slow );
10802 %}
10803