1 //
   2 // Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Common Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  63 // Word a in each register holds a Float, words ab hold a Double.
  64 // The whole registers are used in SSE4.2 version intrinsics,
  65 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  66 // UseXMMForArrayCopy and UseSuperword flags).
  67 // For pre EVEX enabled architectures:
  68 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  69 // For EVEX enabled architectures:
  70 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  71 //
  72 // Linux ABI:   No register preserved across function calls
  73 //              XMM0-XMM7 might hold parameters
  74 // Windows ABI: XMM6-XMM31 preserved across function calls
  75 //              XMM0-XMM3 might hold parameters
  76 
  77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  93 
  94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
 110 
 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
 127 
 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
 144 
 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
 161 
 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
 178 
 179 #ifdef _WIN64
 180 
 181 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
 182 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
 183 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
 184 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
 185 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
 186 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
 187 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
 188 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
 189 reg_def XMM6i( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(8));
 190 reg_def XMM6j( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(9));
 191 reg_def XMM6k( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(10));
 192 reg_def XMM6l( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(11));
 193 reg_def XMM6m( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(12));
 194 reg_def XMM6n( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(13));
 195 reg_def XMM6o( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(14));
 196 reg_def XMM6p( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(15));
 197 
 198 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
 199 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
 200 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
 201 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
 202 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
 203 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
 204 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
 205 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
 206 reg_def XMM7i( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(8));
 207 reg_def XMM7j( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(9));
 208 reg_def XMM7k( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(10));
 209 reg_def XMM7l( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(11));
 210 reg_def XMM7m( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(12));
 211 reg_def XMM7n( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(13));
 212 reg_def XMM7o( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(14));
 213 reg_def XMM7p( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(15));
 214 
 215 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
 216 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
 217 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
 218 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
 219 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
 220 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
 221 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
 222 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
 223 reg_def XMM8i( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(8));
 224 reg_def XMM8j( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(9));
 225 reg_def XMM8k( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(10));
 226 reg_def XMM8l( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(11));
 227 reg_def XMM8m( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(12));
 228 reg_def XMM8n( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(13));
 229 reg_def XMM8o( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(14));
 230 reg_def XMM8p( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(15));
 231 
 232 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
 233 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
 234 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
 235 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
 236 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
 237 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
 238 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
 239 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
 240 reg_def XMM9i( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(8));
 241 reg_def XMM9j( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(9));
 242 reg_def XMM9k( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(10));
 243 reg_def XMM9l( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(11));
 244 reg_def XMM9m( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(12));
 245 reg_def XMM9n( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(13));
 246 reg_def XMM9o( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(14));
 247 reg_def XMM9p( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(15));
 248 
 249 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 250 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
 251 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
 252 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
 253 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
 254 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
 255 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
 256 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
 257 reg_def XMM10i( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(8));
 258 reg_def XMM10j( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(9));
 259 reg_def XMM10k( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(10));
 260 reg_def XMM10l( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(11));
 261 reg_def XMM10m( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(12));
 262 reg_def XMM10n( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(13));
 263 reg_def XMM10o( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(14));
 264 reg_def XMM10p( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(15));
 265 
 266 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 267 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
 268 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
 269 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
 270 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
 271 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
 272 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
 273 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
 274 reg_def XMM11i( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(8));
 275 reg_def XMM11j( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(9));
 276 reg_def XMM11k( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(10));
 277 reg_def XMM11l( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(11));
 278 reg_def XMM11m( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(12));
 279 reg_def XMM11n( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(13));
 280 reg_def XMM11o( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(14));
 281 reg_def XMM11p( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(15));
 282 
 283 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 284 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
 285 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
 286 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
 287 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
 288 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
 289 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
 290 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
 291 reg_def XMM12i( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(8));
 292 reg_def XMM12j( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(9));
 293 reg_def XMM12k( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(10));
 294 reg_def XMM12l( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(11));
 295 reg_def XMM12m( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(12));
 296 reg_def XMM12n( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(13));
 297 reg_def XMM12o( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(14));
 298 reg_def XMM12p( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(15));
 299 
 300 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 301 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
 302 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
 303 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
 304 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
 305 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
 306 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
 307 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
 308 reg_def XMM13i( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(8));
 309 reg_def XMM13j( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(9));
 310 reg_def XMM13k( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(10));
 311 reg_def XMM13l( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(11));
 312 reg_def XMM13m( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(12));
 313 reg_def XMM13n( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(13));
 314 reg_def XMM13o( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(14));
 315 reg_def XMM13p( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(15));
 316 
 317 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 318 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
 319 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
 320 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
 321 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
 322 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
 323 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
 324 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
 325 reg_def XMM14i( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(8));
 326 reg_def XMM14j( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(9));
 327 reg_def XMM14k( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(10));
 328 reg_def XMM14l( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(11));
 329 reg_def XMM14m( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(12));
 330 reg_def XMM14n( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(13));
 331 reg_def XMM14o( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(14));
 332 reg_def XMM14p( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(15));
 333 
 334 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 335 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
 336 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
 337 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
 338 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
 339 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
 340 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
 341 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
 342 reg_def XMM15i( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(8));
 343 reg_def XMM15j( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(9));
 344 reg_def XMM15k( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(10));
 345 reg_def XMM15l( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(11));
 346 reg_def XMM15m( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(12));
 347 reg_def XMM15n( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(13));
 348 reg_def XMM15o( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(14));
 349 reg_def XMM15p( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(15));
 350 
 351 reg_def XMM16 ( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg());
 352 reg_def XMM16b( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(1));
 353 reg_def XMM16c( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(2));
 354 reg_def XMM16d( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(3));
 355 reg_def XMM16e( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(4));
 356 reg_def XMM16f( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(5));
 357 reg_def XMM16g( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(6));
 358 reg_def XMM16h( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(7));
 359 reg_def XMM16i( SOC, SOE, Op_RegF, 16, xmm15->as_VMReg()->next(8));
 360 reg_def XMM16j( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(9));
 361 reg_def XMM16k( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(10));
 362 reg_def XMM16l( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(11));
 363 reg_def XMM16m( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(12));
 364 reg_def XMM16n( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(13));
 365 reg_def XMM16o( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(14));
 366 reg_def XMM16p( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(15));
 367 
 368 reg_def XMM17 ( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg());
 369 reg_def XMM17b( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(1));
 370 reg_def XMM17c( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(2));
 371 reg_def XMM17d( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(3));
 372 reg_def XMM17e( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(4));
 373 reg_def XMM17f( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(5));
 374 reg_def XMM17g( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(6));
 375 reg_def XMM17h( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(7));
 376 reg_def XMM17i( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(8));
 377 reg_def XMM17j( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(9));
 378 reg_def XMM17k( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(10));
 379 reg_def XMM17l( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(11));
 380 reg_def XMM17m( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(12));
 381 reg_def XMM17n( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(13));
 382 reg_def XMM17o( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(14));
 383 reg_def XMM17p( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(15));
 384 
 385 reg_def XMM18 ( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg());
 386 reg_def XMM18b( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(1));
 387 reg_def XMM18c( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(2));
 388 reg_def XMM18d( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(3));
 389 reg_def XMM18e( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(4));
 390 reg_def XMM18f( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(5));
 391 reg_def XMM18g( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(6));
 392 reg_def XMM18h( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(7));
 393 reg_def XMM18i( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(8));
 394 reg_def XMM18j( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(9));
 395 reg_def XMM18k( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(10));
 396 reg_def XMM18l( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(11));
 397 reg_def XMM18m( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(12));
 398 reg_def XMM18n( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(13));
 399 reg_def XMM18o( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(14));
 400 reg_def XMM18p( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(15));
 401 
 402 reg_def XMM19 ( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg());
 403 reg_def XMM19b( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(1));
 404 reg_def XMM19c( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(2));
 405 reg_def XMM19d( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(3));
 406 reg_def XMM19e( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(4));
 407 reg_def XMM19f( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(5));
 408 reg_def XMM19g( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(6));
 409 reg_def XMM19h( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(7));
 410 reg_def XMM19i( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(8));
 411 reg_def XMM19j( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(9));
 412 reg_def XMM19k( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(10));
 413 reg_def XMM19l( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(11));
 414 reg_def XMM19m( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(12));
 415 reg_def XMM19n( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(13));
 416 reg_def XMM19o( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(14));
 417 reg_def XMM19p( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(15));
 418 
 419 reg_def XMM20 ( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg());
 420 reg_def XMM20b( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(1));
 421 reg_def XMM20c( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(2));
 422 reg_def XMM20d( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(3));
 423 reg_def XMM20e( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(4));
 424 reg_def XMM20f( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(5));
 425 reg_def XMM20g( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(6));
 426 reg_def XMM20h( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(7));
 427 reg_def XMM20i( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(8));
 428 reg_def XMM20j( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(9));
 429 reg_def XMM20k( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(10));
 430 reg_def XMM20l( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(11));
 431 reg_def XMM20m( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(12));
 432 reg_def XMM20n( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(13));
 433 reg_def XMM20o( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(14));
 434 reg_def XMM20p( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(15));
 435 
 436 reg_def XMM21 ( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg());
 437 reg_def XMM21b( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(1));
 438 reg_def XMM21c( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(2));
 439 reg_def XMM21d( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(3));
 440 reg_def XMM21e( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(4));
 441 reg_def XMM21f( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(5));
 442 reg_def XMM21g( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(6));
 443 reg_def XMM21h( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(7));
 444 reg_def XMM21i( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(8));
 445 reg_def XMM21j( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(9));
 446 reg_def XMM21k( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(10));
 447 reg_def XMM21l( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(11));
 448 reg_def XMM21m( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(12));
 449 reg_def XMM21n( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(13));
 450 reg_def XMM21o( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(14));
 451 reg_def XMM21p( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(15));
 452 
 453 reg_def XMM22 ( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg());
 454 reg_def XMM22b( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(1));
 455 reg_def XMM22c( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(2));
 456 reg_def XMM22d( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(3));
 457 reg_def XMM22e( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(4));
 458 reg_def XMM22f( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(5));
 459 reg_def XMM22g( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(6));
 460 reg_def XMM22h( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(7));
 461 reg_def XMM22i( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(8));
 462 reg_def XMM22j( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(9));
 463 reg_def XMM22k( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(10));
 464 reg_def XMM22l( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(11));
 465 reg_def XMM22m( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(12));
 466 reg_def XMM22n( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(13));
 467 reg_def XMM22o( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(14));
 468 reg_def XMM22p( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(15));
 469 
 470 reg_def XMM23 ( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg());
 471 reg_def XMM23b( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(1));
 472 reg_def XMM23c( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(2));
 473 reg_def XMM23d( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(3));
 474 reg_def XMM23e( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(4));
 475 reg_def XMM23f( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(5));
 476 reg_def XMM23g( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(6));
 477 reg_def XMM23h( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(7));
 478 reg_def XMM23i( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(8));
 479 reg_def XMM23j( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(9));
 480 reg_def XMM23k( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(10));
 481 reg_def XMM23l( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(11));
 482 reg_def XMM23m( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(12));
 483 reg_def XMM23n( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(13));
 484 reg_def XMM23o( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(14));
 485 reg_def XMM23p( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(15));
 486 
 487 reg_def XMM24 ( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg());
 488 reg_def XMM24b( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(1));
 489 reg_def XMM24c( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(2));
 490 reg_def XMM24d( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(3));
 491 reg_def XMM24e( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(4));
 492 reg_def XMM24f( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(5));
 493 reg_def XMM24g( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(6));
 494 reg_def XMM24h( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(7));
 495 reg_def XMM24i( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(8));
 496 reg_def XMM24j( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(9));
 497 reg_def XMM24k( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(10));
 498 reg_def XMM24l( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(11));
 499 reg_def XMM24m( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(12));
 500 reg_def XMM24n( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(13));
 501 reg_def XMM24o( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(14));
 502 reg_def XMM24p( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(15));
 503 
 504 reg_def XMM25 ( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg());
 505 reg_def XMM25b( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(1));
 506 reg_def XMM25c( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(2));
 507 reg_def XMM25d( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(3));
 508 reg_def XMM25e( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(4));
 509 reg_def XMM25f( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(5));
 510 reg_def XMM25g( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(6));
 511 reg_def XMM25h( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(7));
 512 reg_def XMM25i( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(8));
 513 reg_def XMM25j( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(9));
 514 reg_def XMM25k( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(10));
 515 reg_def XMM25l( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(11));
 516 reg_def XMM25m( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(12));
 517 reg_def XMM25n( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(13));
 518 reg_def XMM25o( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(14));
 519 reg_def XMM25p( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(15));
 520 
 521 reg_def XMM26 ( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg());
 522 reg_def XMM26b( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(1));
 523 reg_def XMM26c( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(2));
 524 reg_def XMM26d( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(3));
 525 reg_def XMM26e( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(4));
 526 reg_def XMM26f( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(5));
 527 reg_def XMM26g( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(6));
 528 reg_def XMM26h( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(7));
 529 reg_def XMM26i( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(8));
 530 reg_def XMM26j( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(9));
 531 reg_def XMM26k( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(10));
 532 reg_def XMM26l( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(11));
 533 reg_def XMM26m( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(12));
 534 reg_def XMM26n( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(13));
 535 reg_def XMM26o( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(14));
 536 reg_def XMM26p( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(15));
 537 
 538 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(1));
 539 reg_def XMM27c( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(2));
 540 reg_def XMM27d( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(3));
 541 reg_def XMM27e( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(4));
 542 reg_def XMM27f( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(5));
 543 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(6));
 544 reg_def XMM27h( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(7));
 545 reg_def XMM27i( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(8));
 546 reg_def XMM27j( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(9));
 547 reg_def XMM27k( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(10));
 548 reg_def XMM27l( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(11));
 549 reg_def XMM27m( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(12));
 550 reg_def XMM27n( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(13));
 551 reg_def XMM27o( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(14));
 552 reg_def XMM27p( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(15));
 553 
 554 reg_def XMM28 ( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg());
 555 reg_def XMM28b( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(1));
 556 reg_def XMM28c( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(2));
 557 reg_def XMM28d( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(3));
 558 reg_def XMM28e( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(4));
 559 reg_def XMM28f( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(5));
 560 reg_def XMM28g( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(6));
 561 reg_def XMM28h( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(7));
 562 reg_def XMM28i( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(8));
 563 reg_def XMM28j( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(9));
 564 reg_def XMM28k( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(10));
 565 reg_def XMM28l( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(11));
 566 reg_def XMM28m( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(12));
 567 reg_def XMM28n( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(13));
 568 reg_def XMM28o( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(14));
 569 reg_def XMM28p( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(15));
 570 
 571 reg_def XMM29 ( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg());
 572 reg_def XMM29b( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(1));
 573 reg_def XMM29c( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(2));
 574 reg_def XMM29d( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(3));
 575 reg_def XMM29e( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(4));
 576 reg_def XMM29f( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(5));
 577 reg_def XMM29g( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(6));
 578 reg_def XMM29h( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(7));
 579 reg_def XMM29i( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(8));
 580 reg_def XMM29j( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(9));
 581 reg_def XMM29k( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(10));
 582 reg_def XMM29l( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(11));
 583 reg_def XMM29m( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(12));
 584 reg_def XMM29n( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(13));
 585 reg_def XMM29o( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(14));
 586 reg_def XMM29p( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(15));
 587 
 588 reg_def XMM30 ( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg());
 589 reg_def XMM30b( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(1));
 590 reg_def XMM30c( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(2));
 591 reg_def XMM30d( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(3));
 592 reg_def XMM30e( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(4));
 593 reg_def XMM30f( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(5));
 594 reg_def XMM30g( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(6));
 595 reg_def XMM30h( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(7));
 596 reg_def XMM30i( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(8));
 597 reg_def XMM30j( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(9));
 598 reg_def XMM30k( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(10));
 599 reg_def XMM30l( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(11));
 600 reg_def XMM30m( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(12));
 601 reg_def XMM30n( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(13));
 602 reg_def XMM30o( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(14));
 603 reg_def XMM30p( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(15));
 604 
 605 reg_def XMM31 ( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg());
 606 reg_def XMM31b( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(1));
 607 reg_def XMM31c( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(2));
 608 reg_def XMM31d( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(3));
 609 reg_def XMM31e( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(4));
 610 reg_def XMM31f( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(5));
 611 reg_def XMM31g( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(6));
 612 reg_def XMM31h( SOC, SOE, Op_RegF, 31, xmm31>-as_VMReg()->next(7));
 613 reg_def XMM31i( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(8));
 614 reg_def XMM31j( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(9));
 615 reg_def XMM31k( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(10));
 616 reg_def XMM31l( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(11));
 617 reg_def XMM31m( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(12));
 618 reg_def XMM31n( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(13));
 619 reg_def XMM31o( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(14));
 620 reg_def XMM31p( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(15));
 621 
 622 #else // _WIN64
 623 
 624 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
 625 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
 626 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
 627 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
 628 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
 629 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
 630 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
 631 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
 632 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
 633 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
 634 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
 635 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
 636 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
 637 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
 638 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
 639 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
 640 
 641 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
 642 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
 643 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
 644 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
 645 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
 646 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
 647 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
 648 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
 649 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
 650 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
 651 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
 652 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
 653 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
 654 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
 655 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
 656 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
 657 
 658 #ifdef _LP64
 659 
 660 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
 661 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
 662 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
 663 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
 664 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
 665 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
 666 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
 667 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
 668 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
 669 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
 670 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
 671 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
 672 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
 673 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
 674 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
 675 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
 676 
 677 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
 678 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
 679 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
 680 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
 681 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
 682 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
 683 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
 684 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
 685 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
 686 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
 687 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
 688 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
 689 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
 690 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
 691 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
 692 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
 693 
 694 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 695 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
 696 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
 697 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
 698 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
 699 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
 700 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
 701 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
 702 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
 703 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
 704 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
 705 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
 706 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
 707 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
 708 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
 709 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
 710 
 711 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 712 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
 713 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
 714 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
 715 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
 716 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
 717 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
 718 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
 719 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
 720 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
 721 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
 722 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
 723 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
 724 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
 725 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
 726 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
 727 
 728 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 729 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
 730 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
 731 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
 732 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
 733 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
 734 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
 735 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
 736 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
 737 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
 738 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
 739 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
 740 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
 741 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
 742 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
 743 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
 744 
 745 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 746 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
 747 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
 748 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
 749 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
 750 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
 751 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
 752 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
 753 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
 754 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
 755 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
 756 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
 757 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
 758 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
 759 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
 760 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
 761 
 762 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 763 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
 764 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
 765 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
 766 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
 767 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
 768 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
 769 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
 770 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
 771 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
 772 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
 773 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
 774 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
 775 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
 776 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
 777 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
 778 
 779 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 780 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
 781 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
 782 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
 783 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
 784 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
 785 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
 786 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
 787 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
 788 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
 789 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
 790 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
 791 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
 792 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
 793 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
 794 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
 795 
 796 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
 797 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
 798 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
 799 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
 800 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
 801 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
 802 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
 803 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
 804 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
 805 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
 806 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
 807 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
 808 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
 809 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
 810 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
 811 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
 812 
 813 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
 814 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
 815 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
 816 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
 817 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
 818 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
 819 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
 820 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
 821 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
 822 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
 823 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
 824 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
 825 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
 826 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
 827 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
 828 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
 829 
 830 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
 831 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
 832 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
 833 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
 834 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
 835 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
 836 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
 837 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
 838 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
 839 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
 840 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
 841 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
 842 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
 843 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
 844 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
 845 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
 846 
 847 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
 848 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
 849 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
 850 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
 851 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
 852 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
 853 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
 854 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
 855 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
 856 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
 857 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
 858 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
 859 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
 860 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
 861 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
 862 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
 863 
 864 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
 865 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
 866 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
 867 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
 868 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
 869 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
 870 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
 871 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
 872 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
 873 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
 874 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
 875 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
 876 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
 877 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
 878 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
 879 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
 880 
 881 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
 882 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
 883 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
 884 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
 885 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
 886 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
 887 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
 888 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
 889 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
 890 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
 891 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
 892 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
 893 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
 894 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
 895 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
 896 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
 897 
 898 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
 899 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
 900 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
 901 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
 902 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
 903 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
 904 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
 905 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
 906 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
 907 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
 908 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
 909 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
 910 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
 911 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
 912 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
 913 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
 914 
 915 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
 916 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
 917 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
 918 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
 919 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
 920 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
 921 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
 922 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
 923 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
 924 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
 925 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
 926 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
 927 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
 928 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
 929 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
 930 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
 931 
 932 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
 933 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
 934 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
 935 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
 936 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
 937 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
 938 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
 939 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
 940 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
 941 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
 942 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
 943 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
 944 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
 945 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
 946 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
 947 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
 948 
 949 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
 950 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
 951 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
 952 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
 953 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
 954 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
 955 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
 956 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
 957 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
 958 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
 959 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
 960 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
 961 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
 962 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
 963 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
 964 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
 965 
 966 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
 967 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
 968 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
 969 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
 970 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
 971 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
 972 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
 973 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
 974 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
 975 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
 976 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
 977 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
 978 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
 979 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
 980 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
 981 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
 982 
 983 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
 984 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
 985 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
 986 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
 987 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
 988 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
 989 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
 990 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
 991 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
 992 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
 993 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
 994 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
 995 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
 996 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
 997 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
 998 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
 999 
1000 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
1001 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
1002 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
1003 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
1004 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
1005 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
1006 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
1007 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
1008 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
1009 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
1010 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
1011 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
1012 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
1013 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
1014 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
1015 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
1016 
1017 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
1018 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
1019 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
1020 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
1021 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
1022 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
1023 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
1024 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
1025 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
1026 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
1027 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
1028 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
1029 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
1030 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
1031 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
1032 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
1033 
1034 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
1035 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
1036 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
1037 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
1038 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
1039 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
1040 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
1041 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
1042 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
1043 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
1044 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
1045 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
1046 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
1047 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
1048 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
1049 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
1050 
1051 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
1052 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
1053 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
1054 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
1055 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
1056 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
1057 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
1058 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
1059 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
1060 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
1061 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
1062 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
1063 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
1064 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
1065 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
1066 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
1067 
1068 #endif // _LP64
1069 
1070 #endif // _WIN64
1071 
1072 #ifdef _LP64
1073 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
1074 #else
1075 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
1076 #endif // _LP64
1077 
1078 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
1079                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
1080                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
1081                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
1082                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
1083                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
1084                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
1085                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p
1086 #ifdef _LP64
1087                   ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
1088                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
1089                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1090                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1091                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1092                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1093                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1094                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
1095                   ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1096                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1097                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1098                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1099                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1100                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1101                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1102                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1103                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1104                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1105                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1106                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1107                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1108                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1109                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1110                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p
1111 #endif
1112                       );
1113 
1114 // flags allocation class should be last.
1115 alloc_class chunk2(RFLAGS);
1116 
1117 // Singleton class for condition codes
1118 reg_class int_flags(RFLAGS);
1119 
1120 // Class for pre evex float registers
1121 reg_class float_reg_legacy(XMM0,
1122                     XMM1,
1123                     XMM2,
1124                     XMM3,
1125                     XMM4,
1126                     XMM5,
1127                     XMM6,
1128                     XMM7
1129 #ifdef _LP64
1130                    ,XMM8,
1131                     XMM9,
1132                     XMM10,
1133                     XMM11,
1134                     XMM12,
1135                     XMM13,
1136                     XMM14,
1137                     XMM15
1138 #endif
1139                     );
1140 
1141 // Class for evex float registers
1142 reg_class float_reg_evex(XMM0,
1143                     XMM1,
1144                     XMM2,
1145                     XMM3,
1146                     XMM4,
1147                     XMM5,
1148                     XMM6,
1149                     XMM7
1150 #ifdef _LP64
1151                    ,XMM8,
1152                     XMM9,
1153                     XMM10,
1154                     XMM11,
1155                     XMM12,
1156                     XMM13,
1157                     XMM14,
1158                     XMM15,
1159                     XMM16,
1160                     XMM17,
1161                     XMM18,
1162                     XMM19,
1163                     XMM20,
1164                     XMM21,
1165                     XMM22,
1166                     XMM23,
1167                     XMM24,
1168                     XMM25,
1169                     XMM26,
1170                     XMM27,
1171                     XMM28,
1172                     XMM29,
1173                     XMM30,
1174                     XMM31
1175 #endif
1176                     );
1177 
1178 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1179 
1180 // Class for pre evex double registers
1181 reg_class double_reg_legacy(XMM0,  XMM0b,
1182                      XMM1,  XMM1b,
1183                      XMM2,  XMM2b,
1184                      XMM3,  XMM3b,
1185                      XMM4,  XMM4b,
1186                      XMM5,  XMM5b,
1187                      XMM6,  XMM6b,
1188                      XMM7,  XMM7b
1189 #ifdef _LP64
1190                     ,XMM8,  XMM8b,
1191                      XMM9,  XMM9b,
1192                      XMM10, XMM10b,
1193                      XMM11, XMM11b,
1194                      XMM12, XMM12b,
1195                      XMM13, XMM13b,
1196                      XMM14, XMM14b,
1197                      XMM15, XMM15b
1198 #endif
1199                      );
1200 
1201 // Class for evex double registers
1202 reg_class double_reg_evex(XMM0,  XMM0b,
1203                      XMM1,  XMM1b,
1204                      XMM2,  XMM2b,
1205                      XMM3,  XMM3b,
1206                      XMM4,  XMM4b,
1207                      XMM5,  XMM5b,
1208                      XMM6,  XMM6b,
1209                      XMM7,  XMM7b
1210 #ifdef _LP64
1211                     ,XMM8,  XMM8b,
1212                      XMM9,  XMM9b,
1213                      XMM10, XMM10b,
1214                      XMM11, XMM11b,
1215                      XMM12, XMM12b,
1216                      XMM13, XMM13b,
1217                      XMM14, XMM14b,
1218                      XMM15, XMM15b,
1219                      XMM16, XMM16b,
1220                      XMM17, XMM17b,
1221                      XMM18, XMM18b,
1222                      XMM19, XMM19b,
1223                      XMM20, XMM20b,
1224                      XMM21, XMM21b,
1225                      XMM22, XMM22b,
1226                      XMM23, XMM23b,
1227                      XMM24, XMM24b,
1228                      XMM25, XMM25b,
1229                      XMM26, XMM26b,
1230                      XMM27, XMM27b,
1231                      XMM28, XMM28b,
1232                      XMM29, XMM29b,
1233                      XMM30, XMM30b,
1234                      XMM31, XMM31b
1235 #endif
1236                      );
1237 
1238 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1239 
1240 // Class for pre evex 32bit vector registers
1241 reg_class vectors_reg_legacy(XMM0,
1242                       XMM1,
1243                       XMM2,
1244                       XMM3,
1245                       XMM4,
1246                       XMM5,
1247                       XMM6,
1248                       XMM7
1249 #ifdef _LP64
1250                      ,XMM8,
1251                       XMM9,
1252                       XMM10,
1253                       XMM11,
1254                       XMM12,
1255                       XMM13,
1256                       XMM14,
1257                       XMM15
1258 #endif
1259                       );
1260 
1261 // Class for evex 32bit vector registers
1262 reg_class vectors_reg_evex(XMM0,
1263                       XMM1,
1264                       XMM2,
1265                       XMM3,
1266                       XMM4,
1267                       XMM5,
1268                       XMM6,
1269                       XMM7
1270 #ifdef _LP64
1271                      ,XMM8,
1272                       XMM9,
1273                       XMM10,
1274                       XMM11,
1275                       XMM12,
1276                       XMM13,
1277                       XMM14,
1278                       XMM15,
1279                       XMM16,
1280                       XMM17,
1281                       XMM18,
1282                       XMM19,
1283                       XMM20,
1284                       XMM21,
1285                       XMM22,
1286                       XMM23,
1287                       XMM24,
1288                       XMM25,
1289                       XMM26,
1290                       XMM27,
1291                       XMM28,
1292                       XMM29,
1293                       XMM30,
1294                       XMM31
1295 #endif
1296                       );
1297 
1298 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1299 
1300 // Class for all 64bit vector registers
1301 reg_class vectord_reg_legacy(XMM0,  XMM0b,
1302                       XMM1,  XMM1b,
1303                       XMM2,  XMM2b,
1304                       XMM3,  XMM3b,
1305                       XMM4,  XMM4b,
1306                       XMM5,  XMM5b,
1307                       XMM6,  XMM6b,
1308                       XMM7,  XMM7b
1309 #ifdef _LP64
1310                      ,XMM8,  XMM8b,
1311                       XMM9,  XMM9b,
1312                       XMM10, XMM10b,
1313                       XMM11, XMM11b,
1314                       XMM12, XMM12b,
1315                       XMM13, XMM13b,
1316                       XMM14, XMM14b,
1317                       XMM15, XMM15b
1318 #endif
1319                       );
1320 
1321 // Class for all 64bit vector registers
1322 reg_class vectord_reg_evex(XMM0,  XMM0b,
1323                       XMM1,  XMM1b,
1324                       XMM2,  XMM2b,
1325                       XMM3,  XMM3b,
1326                       XMM4,  XMM4b,
1327                       XMM5,  XMM5b,
1328                       XMM6,  XMM6b,
1329                       XMM7,  XMM7b
1330 #ifdef _LP64
1331                      ,XMM8,  XMM8b,
1332                       XMM9,  XMM9b,
1333                       XMM10, XMM10b,
1334                       XMM11, XMM11b,
1335                       XMM12, XMM12b,
1336                       XMM13, XMM13b,
1337                       XMM14, XMM14b,
1338                       XMM15, XMM15b,
1339                       XMM16, XMM16b,
1340                       XMM17, XMM17b,
1341                       XMM18, XMM18b,
1342                       XMM19, XMM19b,
1343                       XMM20, XMM20b,
1344                       XMM21, XMM21b,
1345                       XMM22, XMM22b,
1346                       XMM23, XMM23b,
1347                       XMM24, XMM24b,
1348                       XMM25, XMM25b,
1349                       XMM26, XMM26b,
1350                       XMM27, XMM27b,
1351                       XMM28, XMM28b,
1352                       XMM29, XMM29b,
1353                       XMM30, XMM30b,
1354                       XMM31, XMM31b
1355 #endif
1356                       );
1357 
1358 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1359 
1360 // Class for all 128bit vector registers
1361 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
1362                       XMM1,  XMM1b,  XMM1c,  XMM1d,
1363                       XMM2,  XMM2b,  XMM2c,  XMM2d,
1364                       XMM3,  XMM3b,  XMM3c,  XMM3d,
1365                       XMM4,  XMM4b,  XMM4c,  XMM4d,
1366                       XMM5,  XMM5b,  XMM5c,  XMM5d,
1367                       XMM6,  XMM6b,  XMM6c,  XMM6d,
1368                       XMM7,  XMM7b,  XMM7c,  XMM7d
1369 #ifdef _LP64
1370                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,
1371                       XMM9,  XMM9b,  XMM9c,  XMM9d,
1372                       XMM10, XMM10b, XMM10c, XMM10d,
1373                       XMM11, XMM11b, XMM11c, XMM11d,
1374                       XMM12, XMM12b, XMM12c, XMM12d,
1375                       XMM13, XMM13b, XMM13c, XMM13d,
1376                       XMM14, XMM14b, XMM14c, XMM14d,
1377                       XMM15, XMM15b, XMM15c, XMM15d
1378 #endif
1379                       );
1380 
1381 // Class for all 128bit vector registers
1382 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
1383                       XMM1,  XMM1b,  XMM1c,  XMM1d,
1384                       XMM2,  XMM2b,  XMM2c,  XMM2d,
1385                       XMM3,  XMM3b,  XMM3c,  XMM3d,
1386                       XMM4,  XMM4b,  XMM4c,  XMM4d,
1387                       XMM5,  XMM5b,  XMM5c,  XMM5d,
1388                       XMM6,  XMM6b,  XMM6c,  XMM6d,
1389                       XMM7,  XMM7b,  XMM7c,  XMM7d
1390 #ifdef _LP64
1391                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,
1392                       XMM9,  XMM9b,  XMM9c,  XMM9d,
1393                       XMM10, XMM10b, XMM10c, XMM10d,
1394                       XMM11, XMM11b, XMM11c, XMM11d,
1395                       XMM12, XMM12b, XMM12c, XMM12d,
1396                       XMM13, XMM13b, XMM13c, XMM13d,
1397                       XMM14, XMM14b, XMM14c, XMM14d,
1398                       XMM15, XMM15b, XMM15c, XMM15d,
1399                       XMM16, XMM16b, XMM16c, XMM16d,
1400                       XMM17, XMM17b, XMM17c, XMM17d,
1401                       XMM18, XMM18b, XMM18c, XMM18d,
1402                       XMM19, XMM19b, XMM19c, XMM19d,
1403                       XMM20, XMM20b, XMM20c, XMM20d,
1404                       XMM21, XMM21b, XMM21c, XMM21d,
1405                       XMM22, XMM22b, XMM22c, XMM22d,
1406                       XMM23, XMM23b, XMM23c, XMM23d,
1407                       XMM24, XMM24b, XMM24c, XMM24d,
1408                       XMM25, XMM25b, XMM25c, XMM25d,
1409                       XMM26, XMM26b, XMM26c, XMM26d,
1410                       XMM27, XMM27b, XMM27c, XMM27d,
1411                       XMM28, XMM28b, XMM28c, XMM28d,
1412                       XMM29, XMM29b, XMM29c, XMM29d,
1413                       XMM30, XMM30b, XMM30c, XMM30d,
1414                       XMM31, XMM31b, XMM31c, XMM31d
1415 #endif
1416                       );
1417 
1418 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1419 
1420 // Class for all 256bit vector registers
1421 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
1422                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
1423                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
1424                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
1425                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
1426                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
1427                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
1428                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
1429 #ifdef _LP64
1430                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
1431                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
1432                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1433                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1434                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1435                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1436                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1437                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
1438 #endif
1439                       );
1440 
1441 // Class for all 256bit vector registers
1442 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
1443                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
1444                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
1445                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
1446                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
1447                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
1448                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
1449                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
1450 #ifdef _LP64
1451                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
1452                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
1453                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1454                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1455                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1456                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1457                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1458                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1459                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1460                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1461                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1462                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1463                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1464                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1465                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1466                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1467                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1468                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1469                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1470                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1471                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1472                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1473                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1474                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h
1475 #endif
1476                       );
1477 
1478 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1479 
1480 // Class for all 512bit vector registers
1481 reg_class vectorz_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
1482                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
1483                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
1484                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
1485                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
1486                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
1487                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
1488                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p
1489 #ifdef _LP64
1490                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
1491                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
1492                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1493                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1494                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1495                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1496                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1497                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
1498                      ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1499                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1500                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1501                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1502                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1503                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1504                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1505                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1506                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1507                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1508                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1509                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1510                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1511                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1512                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1513                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p
1514 #endif
1515                       );
1516 
1517 %}
1518 
1519 
1520 //----------SOURCE BLOCK-------------------------------------------------------
1521 // This is a block of C++ code which provides values, functions, and
1522 // definitions necessary in the rest of the architecture description
1523 
1524 source_hpp %{
1525 // Header information of the source block.
1526 // Method declarations/definitions which are used outside
1527 // the ad-scope can conveniently be defined here.
1528 //
1529 // To keep related declarations/definitions/uses close together,
1530 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
1531 
1532 class NativeJump;
1533 
1534 class CallStubImpl {
1535 
1536   //--------------------------------------------------------------
1537   //---<  Used for optimization in Compile::shorten_branches  >---
1538   //--------------------------------------------------------------
1539 
1540  public:
1541   // Size of call trampoline stub.
1542   static uint size_call_trampoline() {
1543     return 0; // no call trampolines on this platform
1544   }
1545 
1546   // number of relocations needed by a call trampoline stub
1547   static uint reloc_call_trampoline() {
1548     return 0; // no call trampolines on this platform
1549   }
1550 };
1551 
1552 class HandlerImpl {
1553 
1554  public:
1555 
1556   static int emit_exception_handler(CodeBuffer &cbuf);
1557   static int emit_deopt_handler(CodeBuffer& cbuf);
1558 
1559   static uint size_exception_handler() {
1560     // NativeCall instruction size is the same as NativeJump.
1561     // exception handler starts out as jump and can be patched to
1562     // a call be deoptimization.  (4932387)
1563     // Note that this value is also credited (in output.cpp) to
1564     // the size of the code section.
1565     return NativeJump::instruction_size;
1566   }
1567 
1568 #ifdef _LP64
1569   static uint size_deopt_handler() {
1570     // three 5 byte instructions
1571     return 15;
1572   }
1573 #else
1574   static uint size_deopt_handler() {
1575     // NativeCall instruction size is the same as NativeJump.
1576     // exception handler starts out as jump and can be patched to
1577     // a call be deoptimization.  (4932387)
1578     // Note that this value is also credited (in output.cpp) to
1579     // the size of the code section.
1580     return 5 + NativeJump::instruction_size; // pushl(); jmp;
1581   }
1582 #endif
1583 };
1584 
1585 %} // end source_hpp
1586 
1587 source %{
1588 
1589 // Emit exception handler code.
1590 // Stuff framesize into a register and call a VM stub routine.
1591 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
1592 
1593   // Note that the code buffer's insts_mark is always relative to insts.
1594   // That's why we must use the macroassembler to generate a handler.
1595   MacroAssembler _masm(&cbuf);
1596   address base = __ start_a_stub(size_exception_handler());
1597   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1598   int offset = __ offset();
1599   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1600   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1601   __ end_a_stub();
1602   return offset;
1603 }
1604 
1605 // Emit deopt handler code.
1606 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
1607 
1608   // Note that the code buffer's insts_mark is always relative to insts.
1609   // That's why we must use the macroassembler to generate a handler.
1610   MacroAssembler _masm(&cbuf);
1611   address base = __ start_a_stub(size_deopt_handler());
1612   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1613   int offset = __ offset();
1614 
1615 #ifdef _LP64
1616   address the_pc = (address) __ pc();
1617   Label next;
1618   // push a "the_pc" on the stack without destroying any registers
1619   // as they all may be live.
1620 
1621   // push address of "next"
1622   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1623   __ bind(next);
1624   // adjust it so it matches "the_pc"
1625   __ subptr(Address(rsp, 0), __ offset() - offset);
1626 #else
1627   InternalAddress here(__ pc());
1628   __ pushptr(here.addr());
1629 #endif
1630 
1631   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1632   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1633   __ end_a_stub();
1634   return offset;
1635 }
1636 
1637 
1638 //=============================================================================
1639 
1640   // Float masks come from different places depending on platform.
1641 #ifdef _LP64
1642   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
1643   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
1644   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
1645   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
1646 #else
1647   static address float_signmask()  { return (address)float_signmask_pool; }
1648   static address float_signflip()  { return (address)float_signflip_pool; }
1649   static address double_signmask() { return (address)double_signmask_pool; }
1650   static address double_signflip() { return (address)double_signflip_pool; }
1651 #endif
1652 
1653 
1654 const bool Matcher::match_rule_supported(int opcode) {
1655   if (!has_match_rule(opcode))
1656     return false;
1657 
1658   switch (opcode) {
1659     case Op_PopCountI:
1660     case Op_PopCountL:
1661       if (!UsePopCountInstruction)
1662         return false;
1663     break;
1664     case Op_MulVI:
1665       if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
1666         return false;
1667     break;
1668     case Op_MulVL:
1669     case Op_MulReductionVL:
1670       if (VM_Version::supports_avx512dq() == false)
1671         return false;
1672     case Op_AddReductionVL:
1673       if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here
1674         return false;
1675     case Op_AddReductionVI:
1676       if (UseSSE < 3) // requires at least SSE3
1677         return false;
1678     case Op_MulReductionVI:
1679       if (UseSSE < 4) // requires at least SSE4
1680         return false;
1681     case Op_AddReductionVF:
1682     case Op_AddReductionVD:
1683     case Op_MulReductionVF:
1684     case Op_MulReductionVD:
1685       if (UseSSE < 1) // requires at least SSE
1686         return false;
1687     break;
1688     case Op_CompareAndSwapL:
1689 #ifdef _LP64
1690     case Op_CompareAndSwapP:
1691 #endif
1692       if (!VM_Version::supports_cx8())
1693         return false;
1694     break;
1695   }
1696 
1697   return true;  // Per default match rules are supported.
1698 }
1699 
1700 // Max vector size in bytes. 0 if not supported.
1701 const int Matcher::vector_width_in_bytes(BasicType bt) {
1702   assert(is_java_primitive(bt), "only primitive type vectors");
1703   if (UseSSE < 2) return 0;
1704   // SSE2 supports 128bit vectors for all types.
1705   // AVX2 supports 256bit vectors for all types.
1706   // AVX2/EVEX supports 512bit vectors for all types.
1707   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
1708   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
1709   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
1710     size = (UseAVX > 2) ? 64 : 32;
1711   // Use flag to limit vector size.
1712   size = MIN2(size,(int)MaxVectorSize);
1713   // Minimum 2 values in vector (or 4 for bytes).
1714   switch (bt) {
1715   case T_DOUBLE:
1716   case T_LONG:
1717     if (size < 16) return 0;
1718   case T_FLOAT:
1719   case T_INT:
1720     if (size < 8) return 0;
1721   case T_BOOLEAN:
1722   case T_BYTE:
1723   case T_CHAR:
1724   case T_SHORT:
1725     if (size < 4) return 0;
1726     break;
1727   default:
1728     ShouldNotReachHere();
1729   }
1730   return size;
1731 }
1732 
1733 // Limits on vector size (number of elements) loaded into vector.
1734 const int Matcher::max_vector_size(const BasicType bt) {
1735   return vector_width_in_bytes(bt)/type2aelembytes(bt);
1736 }
1737 const int Matcher::min_vector_size(const BasicType bt) {
1738   int max_size = max_vector_size(bt);
1739   // Min size which can be loaded into vector is 4 bytes.
1740   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
1741   return MIN2(size,max_size);
1742 }
1743 
1744 // Vector ideal reg corresponding to specidied size in bytes
1745 const int Matcher::vector_ideal_reg(int size) {
1746   assert(MaxVectorSize >= size, "");
1747   switch(size) {
1748     case  4: return Op_VecS;
1749     case  8: return Op_VecD;
1750     case 16: return Op_VecX;
1751     case 32: return Op_VecY;
1752     case 64: return Op_VecZ;
1753   }
1754   ShouldNotReachHere();
1755   return 0;
1756 }
1757 
1758 // Only lowest bits of xmm reg are used for vector shift count.
1759 const int Matcher::vector_shift_count_ideal_reg(int size) {
1760   return Op_VecS;
1761 }
1762 
1763 // x86 supports misaligned vectors store/load.
1764 const bool Matcher::misaligned_vectors_ok() {
1765   return !AlignVector; // can be changed by flag
1766 }
1767 
1768 // x86 AES instructions are compatible with SunJCE expanded
1769 // keys, hence we do not need to pass the original key to stubs
1770 const bool Matcher::pass_original_key_for_aes() {
1771   return false;
1772 }
1773 
1774 // Helper methods for MachSpillCopyNode::implementation().
1775 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
1776                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
1777   // In 64-bit VM size calculation is very complex. Emitting instructions
1778   // into scratch buffer is used to get size in 64-bit VM.
1779   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
1780   assert(ireg == Op_VecS || // 32bit vector
1781          (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
1782          (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
1783          "no non-adjacent vector moves" );
1784   if (cbuf) {
1785     MacroAssembler _masm(cbuf);
1786     int offset = __ offset();
1787     switch (ireg) {
1788     case Op_VecS: // copy whole register
1789     case Op_VecD:
1790     case Op_VecX:
1791       __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1792       break;
1793     case Op_VecY:
1794       __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1795       break;
1796     case Op_VecZ:
1797       __ evmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
1798       break;
1799     default:
1800       ShouldNotReachHere();
1801     }
1802     int size = __ offset() - offset;
1803 #ifdef ASSERT
1804     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
1805     assert(!do_size || size == 4, "incorrect size calculattion");
1806 #endif
1807     return size;
1808 #ifndef PRODUCT
1809   } else if (!do_size) {
1810     switch (ireg) {
1811     case Op_VecS:
1812     case Op_VecD:
1813     case Op_VecX:
1814       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
1815       break;
1816     case Op_VecY:
1817     case Op_VecZ:
1818       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
1819       break;
1820     default:
1821       ShouldNotReachHere();
1822     }
1823 #endif
1824   }
1825   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
1826   return (UseAVX > 2) ? 6 : 4;
1827 }
1828 
1829 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
1830                             int stack_offset, int reg, uint ireg, outputStream* st) {
1831   // In 64-bit VM size calculation is very complex. Emitting instructions
1832   // into scratch buffer is used to get size in 64-bit VM.
1833   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
1834   if (cbuf) {
1835     MacroAssembler _masm(cbuf);
1836     int offset = __ offset();
1837     if (is_load) {
1838       switch (ireg) {
1839       case Op_VecS:
1840         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1841         break;
1842       case Op_VecD:
1843         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1844         break;
1845       case Op_VecX:
1846         __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1847         break;
1848       case Op_VecY:
1849         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1850         break;
1851       case Op_VecZ:
1852         __ evmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
1853         break;
1854       default:
1855         ShouldNotReachHere();
1856       }
1857     } else { // store
1858       switch (ireg) {
1859       case Op_VecS:
1860         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1861         break;
1862       case Op_VecD:
1863         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1864         break;
1865       case Op_VecX:
1866         __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1867         break;
1868       case Op_VecY:
1869         __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1870         break;
1871       case Op_VecZ:
1872         __ evmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
1873         break;
1874       default:
1875         ShouldNotReachHere();
1876       }
1877     }
1878     int size = __ offset() - offset;
1879 #ifdef ASSERT
1880     int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4);
1881     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
1882     assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
1883 #endif
1884     return size;
1885 #ifndef PRODUCT
1886   } else if (!do_size) {
1887     if (is_load) {
1888       switch (ireg) {
1889       case Op_VecS:
1890         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1891         break;
1892       case Op_VecD:
1893         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1894         break;
1895        case Op_VecX:
1896         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1897         break;
1898       case Op_VecY:
1899       case Op_VecZ:
1900         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1901         break;
1902       default:
1903         ShouldNotReachHere();
1904       }
1905     } else { // store
1906       switch (ireg) {
1907       case Op_VecS:
1908         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1909         break;
1910       case Op_VecD:
1911         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1912         break;
1913        case Op_VecX:
1914         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1915         break;
1916       case Op_VecY:
1917       case Op_VecZ:
1918         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1919         break;
1920       default:
1921         ShouldNotReachHere();
1922       }
1923     }
1924 #endif
1925   }
1926   int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4);
1927   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
1928   return 5+offset_size;
1929 }
1930 
1931 static inline jfloat replicate4_imm(int con, int width) {
1932   // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
1933   assert(width == 1 || width == 2, "only byte or short types here");
1934   int bit_width = width * 8;
1935   jint val = con;
1936   val &= (1 << bit_width) - 1;  // mask off sign bits
1937   while(bit_width < 32) {
1938     val |= (val << bit_width);
1939     bit_width <<= 1;
1940   }
1941   jfloat fval = *((jfloat*) &val);  // coerce to float type
1942   return fval;
1943 }
1944 
1945 static inline jdouble replicate8_imm(int con, int width) {
1946   // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
1947   assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
1948   int bit_width = width * 8;
1949   jlong val = con;
1950   val &= (((jlong) 1) << bit_width) - 1;  // mask off sign bits
1951   while(bit_width < 64) {
1952     val |= (val << bit_width);
1953     bit_width <<= 1;
1954   }
1955   jdouble dval = *((jdouble*) &val);  // coerce to double type
1956   return dval;
1957 }
1958 
1959 #ifndef PRODUCT
1960   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
1961     st->print("nop \t# %d bytes pad for loops and calls", _count);
1962   }
1963 #endif
1964 
1965   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
1966     MacroAssembler _masm(&cbuf);
1967     __ nop(_count);
1968   }
1969 
1970   uint MachNopNode::size(PhaseRegAlloc*) const {
1971     return _count;
1972   }
1973 
1974 #ifndef PRODUCT
1975   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
1976     st->print("# breakpoint");
1977   }
1978 #endif
1979 
1980   void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
1981     MacroAssembler _masm(&cbuf);
1982     __ int3();
1983   }
1984 
1985   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
1986     return MachNode::size(ra_);
1987   }
1988 
1989 %}
1990 
1991 encode %{
1992 
1993   enc_class call_epilog %{
1994     if (VerifyStackAtCalls) {
1995       // Check that stack depth is unchanged: find majik cookie on stack
1996       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
1997       MacroAssembler _masm(&cbuf);
1998       Label L;
1999       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
2000       __ jccb(Assembler::equal, L);
2001       // Die if stack mismatch
2002       __ int3();
2003       __ bind(L);
2004     }
2005   %}
2006 
2007 %}
2008 
2009 
2010 //----------OPERANDS-----------------------------------------------------------
2011 // Operand definitions must precede instruction definitions for correct parsing
2012 // in the ADLC because operands constitute user defined types which are used in
2013 // instruction definitions.
2014 
2015 // This one generically applies only for evex, so only one version
2016 operand vecZ() %{
2017   constraint(ALLOC_IN_RC(vectorz_reg));
2018   match(VecZ);
2019 
2020   format %{ %}
2021   interface(REG_INTER);
2022 %}
2023 
2024 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
2025 
2026 // ============================================================================
2027 
2028 instruct ShouldNotReachHere() %{
2029   match(Halt);
2030   format %{ "int3\t# ShouldNotReachHere" %}
2031   ins_encode %{
2032     __ int3();
2033   %}
2034   ins_pipe(pipe_slow);
2035 %}
2036 
2037 // ============================================================================
2038 
2039 instruct addF_reg(regF dst, regF src) %{
2040   predicate((UseSSE>=1) && (UseAVX == 0));
2041   match(Set dst (AddF dst src));
2042 
2043   format %{ "addss   $dst, $src" %}
2044   ins_cost(150);
2045   ins_encode %{
2046     __ addss($dst$$XMMRegister, $src$$XMMRegister);
2047   %}
2048   ins_pipe(pipe_slow);
2049 %}
2050 
2051 instruct addF_mem(regF dst, memory src) %{
2052   predicate((UseSSE>=1) && (UseAVX == 0));
2053   match(Set dst (AddF dst (LoadF src)));
2054 
2055   format %{ "addss   $dst, $src" %}
2056   ins_cost(150);
2057   ins_encode %{
2058     __ addss($dst$$XMMRegister, $src$$Address);
2059   %}
2060   ins_pipe(pipe_slow);
2061 %}
2062 
2063 instruct addF_imm(regF dst, immF con) %{
2064   predicate((UseSSE>=1) && (UseAVX == 0));
2065   match(Set dst (AddF dst con));
2066   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2067   ins_cost(150);
2068   ins_encode %{
2069     __ addss($dst$$XMMRegister, $constantaddress($con));
2070   %}
2071   ins_pipe(pipe_slow);
2072 %}
2073 
2074 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
2075   predicate(UseAVX > 0);
2076   match(Set dst (AddF src1 src2));
2077 
2078   format %{ "vaddss  $dst, $src1, $src2" %}
2079   ins_cost(150);
2080   ins_encode %{
2081     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2082   %}
2083   ins_pipe(pipe_slow);
2084 %}
2085 
2086 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
2087   predicate(UseAVX > 0);
2088   match(Set dst (AddF src1 (LoadF src2)));
2089 
2090   format %{ "vaddss  $dst, $src1, $src2" %}
2091   ins_cost(150);
2092   ins_encode %{
2093     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2094   %}
2095   ins_pipe(pipe_slow);
2096 %}
2097 
2098 instruct addF_reg_imm(regF dst, regF src, immF con) %{
2099   predicate(UseAVX > 0);
2100   match(Set dst (AddF src con));
2101 
2102   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2103   ins_cost(150);
2104   ins_encode %{
2105     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2106   %}
2107   ins_pipe(pipe_slow);
2108 %}
2109 
2110 instruct addD_reg(regD dst, regD src) %{
2111   predicate((UseSSE>=2) && (UseAVX == 0));
2112   match(Set dst (AddD dst src));
2113 
2114   format %{ "addsd   $dst, $src" %}
2115   ins_cost(150);
2116   ins_encode %{
2117     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
2118   %}
2119   ins_pipe(pipe_slow);
2120 %}
2121 
2122 instruct addD_mem(regD dst, memory src) %{
2123   predicate((UseSSE>=2) && (UseAVX == 0));
2124   match(Set dst (AddD dst (LoadD src)));
2125 
2126   format %{ "addsd   $dst, $src" %}
2127   ins_cost(150);
2128   ins_encode %{
2129     __ addsd($dst$$XMMRegister, $src$$Address);
2130   %}
2131   ins_pipe(pipe_slow);
2132 %}
2133 
2134 instruct addD_imm(regD dst, immD con) %{
2135   predicate((UseSSE>=2) && (UseAVX == 0));
2136   match(Set dst (AddD dst con));
2137   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2138   ins_cost(150);
2139   ins_encode %{
2140     __ addsd($dst$$XMMRegister, $constantaddress($con));
2141   %}
2142   ins_pipe(pipe_slow);
2143 %}
2144 
2145 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
2146   predicate(UseAVX > 0);
2147   match(Set dst (AddD src1 src2));
2148 
2149   format %{ "vaddsd  $dst, $src1, $src2" %}
2150   ins_cost(150);
2151   ins_encode %{
2152     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2153   %}
2154   ins_pipe(pipe_slow);
2155 %}
2156 
2157 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
2158   predicate(UseAVX > 0);
2159   match(Set dst (AddD src1 (LoadD src2)));
2160 
2161   format %{ "vaddsd  $dst, $src1, $src2" %}
2162   ins_cost(150);
2163   ins_encode %{
2164     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2165   %}
2166   ins_pipe(pipe_slow);
2167 %}
2168 
2169 instruct addD_reg_imm(regD dst, regD src, immD con) %{
2170   predicate(UseAVX > 0);
2171   match(Set dst (AddD src con));
2172 
2173   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2174   ins_cost(150);
2175   ins_encode %{
2176     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2177   %}
2178   ins_pipe(pipe_slow);
2179 %}
2180 
2181 instruct subF_reg(regF dst, regF src) %{
2182   predicate((UseSSE>=1) && (UseAVX == 0));
2183   match(Set dst (SubF dst src));
2184 
2185   format %{ "subss   $dst, $src" %}
2186   ins_cost(150);
2187   ins_encode %{
2188     __ subss($dst$$XMMRegister, $src$$XMMRegister);
2189   %}
2190   ins_pipe(pipe_slow);
2191 %}
2192 
2193 instruct subF_mem(regF dst, memory src) %{
2194   predicate((UseSSE>=1) && (UseAVX == 0));
2195   match(Set dst (SubF dst (LoadF src)));
2196 
2197   format %{ "subss   $dst, $src" %}
2198   ins_cost(150);
2199   ins_encode %{
2200     __ subss($dst$$XMMRegister, $src$$Address);
2201   %}
2202   ins_pipe(pipe_slow);
2203 %}
2204 
2205 instruct subF_imm(regF dst, immF con) %{
2206   predicate((UseSSE>=1) && (UseAVX == 0));
2207   match(Set dst (SubF dst con));
2208   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2209   ins_cost(150);
2210   ins_encode %{
2211     __ subss($dst$$XMMRegister, $constantaddress($con));
2212   %}
2213   ins_pipe(pipe_slow);
2214 %}
2215 
2216 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
2217   predicate(UseAVX > 0);
2218   match(Set dst (SubF src1 src2));
2219 
2220   format %{ "vsubss  $dst, $src1, $src2" %}
2221   ins_cost(150);
2222   ins_encode %{
2223     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2224   %}
2225   ins_pipe(pipe_slow);
2226 %}
2227 
2228 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
2229   predicate(UseAVX > 0);
2230   match(Set dst (SubF src1 (LoadF src2)));
2231 
2232   format %{ "vsubss  $dst, $src1, $src2" %}
2233   ins_cost(150);
2234   ins_encode %{
2235     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2236   %}
2237   ins_pipe(pipe_slow);
2238 %}
2239 
2240 instruct subF_reg_imm(regF dst, regF src, immF con) %{
2241   predicate(UseAVX > 0);
2242   match(Set dst (SubF src con));
2243 
2244   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2245   ins_cost(150);
2246   ins_encode %{
2247     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2248   %}
2249   ins_pipe(pipe_slow);
2250 %}
2251 
2252 instruct subD_reg(regD dst, regD src) %{
2253   predicate((UseSSE>=2) && (UseAVX == 0));
2254   match(Set dst (SubD dst src));
2255 
2256   format %{ "subsd   $dst, $src" %}
2257   ins_cost(150);
2258   ins_encode %{
2259     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
2260   %}
2261   ins_pipe(pipe_slow);
2262 %}
2263 
2264 instruct subD_mem(regD dst, memory src) %{
2265   predicate((UseSSE>=2) && (UseAVX == 0));
2266   match(Set dst (SubD dst (LoadD src)));
2267 
2268   format %{ "subsd   $dst, $src" %}
2269   ins_cost(150);
2270   ins_encode %{
2271     __ subsd($dst$$XMMRegister, $src$$Address);
2272   %}
2273   ins_pipe(pipe_slow);
2274 %}
2275 
2276 instruct subD_imm(regD dst, immD con) %{
2277   predicate((UseSSE>=2) && (UseAVX == 0));
2278   match(Set dst (SubD dst con));
2279   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2280   ins_cost(150);
2281   ins_encode %{
2282     __ subsd($dst$$XMMRegister, $constantaddress($con));
2283   %}
2284   ins_pipe(pipe_slow);
2285 %}
2286 
2287 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
2288   predicate(UseAVX > 0);
2289   match(Set dst (SubD src1 src2));
2290 
2291   format %{ "vsubsd  $dst, $src1, $src2" %}
2292   ins_cost(150);
2293   ins_encode %{
2294     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2295   %}
2296   ins_pipe(pipe_slow);
2297 %}
2298 
2299 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
2300   predicate(UseAVX > 0);
2301   match(Set dst (SubD src1 (LoadD src2)));
2302 
2303   format %{ "vsubsd  $dst, $src1, $src2" %}
2304   ins_cost(150);
2305   ins_encode %{
2306     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2307   %}
2308   ins_pipe(pipe_slow);
2309 %}
2310 
2311 instruct subD_reg_imm(regD dst, regD src, immD con) %{
2312   predicate(UseAVX > 0);
2313   match(Set dst (SubD src con));
2314 
2315   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2316   ins_cost(150);
2317   ins_encode %{
2318     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2319   %}
2320   ins_pipe(pipe_slow);
2321 %}
2322 
2323 instruct mulF_reg(regF dst, regF src) %{
2324   predicate((UseSSE>=1) && (UseAVX == 0));
2325   match(Set dst (MulF dst src));
2326 
2327   format %{ "mulss   $dst, $src" %}
2328   ins_cost(150);
2329   ins_encode %{
2330     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
2331   %}
2332   ins_pipe(pipe_slow);
2333 %}
2334 
2335 instruct mulF_mem(regF dst, memory src) %{
2336   predicate((UseSSE>=1) && (UseAVX == 0));
2337   match(Set dst (MulF dst (LoadF src)));
2338 
2339   format %{ "mulss   $dst, $src" %}
2340   ins_cost(150);
2341   ins_encode %{
2342     __ mulss($dst$$XMMRegister, $src$$Address);
2343   %}
2344   ins_pipe(pipe_slow);
2345 %}
2346 
2347 instruct mulF_imm(regF dst, immF con) %{
2348   predicate((UseSSE>=1) && (UseAVX == 0));
2349   match(Set dst (MulF dst con));
2350   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2351   ins_cost(150);
2352   ins_encode %{
2353     __ mulss($dst$$XMMRegister, $constantaddress($con));
2354   %}
2355   ins_pipe(pipe_slow);
2356 %}
2357 
2358 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
2359   predicate(UseAVX > 0);
2360   match(Set dst (MulF src1 src2));
2361 
2362   format %{ "vmulss  $dst, $src1, $src2" %}
2363   ins_cost(150);
2364   ins_encode %{
2365     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2366   %}
2367   ins_pipe(pipe_slow);
2368 %}
2369 
2370 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
2371   predicate(UseAVX > 0);
2372   match(Set dst (MulF src1 (LoadF src2)));
2373 
2374   format %{ "vmulss  $dst, $src1, $src2" %}
2375   ins_cost(150);
2376   ins_encode %{
2377     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2378   %}
2379   ins_pipe(pipe_slow);
2380 %}
2381 
2382 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
2383   predicate(UseAVX > 0);
2384   match(Set dst (MulF src con));
2385 
2386   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2387   ins_cost(150);
2388   ins_encode %{
2389     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2390   %}
2391   ins_pipe(pipe_slow);
2392 %}
2393 
2394 instruct mulD_reg(regD dst, regD src) %{
2395   predicate((UseSSE>=2) && (UseAVX == 0));
2396   match(Set dst (MulD dst src));
2397 
2398   format %{ "mulsd   $dst, $src" %}
2399   ins_cost(150);
2400   ins_encode %{
2401     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
2402   %}
2403   ins_pipe(pipe_slow);
2404 %}
2405 
2406 instruct mulD_mem(regD dst, memory src) %{
2407   predicate((UseSSE>=2) && (UseAVX == 0));
2408   match(Set dst (MulD dst (LoadD src)));
2409 
2410   format %{ "mulsd   $dst, $src" %}
2411   ins_cost(150);
2412   ins_encode %{
2413     __ mulsd($dst$$XMMRegister, $src$$Address);
2414   %}
2415   ins_pipe(pipe_slow);
2416 %}
2417 
2418 instruct mulD_imm(regD dst, immD con) %{
2419   predicate((UseSSE>=2) && (UseAVX == 0));
2420   match(Set dst (MulD dst con));
2421   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2422   ins_cost(150);
2423   ins_encode %{
2424     __ mulsd($dst$$XMMRegister, $constantaddress($con));
2425   %}
2426   ins_pipe(pipe_slow);
2427 %}
2428 
2429 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
2430   predicate(UseAVX > 0);
2431   match(Set dst (MulD src1 src2));
2432 
2433   format %{ "vmulsd  $dst, $src1, $src2" %}
2434   ins_cost(150);
2435   ins_encode %{
2436     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2437   %}
2438   ins_pipe(pipe_slow);
2439 %}
2440 
2441 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
2442   predicate(UseAVX > 0);
2443   match(Set dst (MulD src1 (LoadD src2)));
2444 
2445   format %{ "vmulsd  $dst, $src1, $src2" %}
2446   ins_cost(150);
2447   ins_encode %{
2448     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2449   %}
2450   ins_pipe(pipe_slow);
2451 %}
2452 
2453 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
2454   predicate(UseAVX > 0);
2455   match(Set dst (MulD src con));
2456 
2457   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2458   ins_cost(150);
2459   ins_encode %{
2460     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2461   %}
2462   ins_pipe(pipe_slow);
2463 %}
2464 
2465 instruct divF_reg(regF dst, regF src) %{
2466   predicate((UseSSE>=1) && (UseAVX == 0));
2467   match(Set dst (DivF dst src));
2468 
2469   format %{ "divss   $dst, $src" %}
2470   ins_cost(150);
2471   ins_encode %{
2472     __ divss($dst$$XMMRegister, $src$$XMMRegister);
2473   %}
2474   ins_pipe(pipe_slow);
2475 %}
2476 
2477 instruct divF_mem(regF dst, memory src) %{
2478   predicate((UseSSE>=1) && (UseAVX == 0));
2479   match(Set dst (DivF dst (LoadF src)));
2480 
2481   format %{ "divss   $dst, $src" %}
2482   ins_cost(150);
2483   ins_encode %{
2484     __ divss($dst$$XMMRegister, $src$$Address);
2485   %}
2486   ins_pipe(pipe_slow);
2487 %}
2488 
2489 instruct divF_imm(regF dst, immF con) %{
2490   predicate((UseSSE>=1) && (UseAVX == 0));
2491   match(Set dst (DivF dst con));
2492   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2493   ins_cost(150);
2494   ins_encode %{
2495     __ divss($dst$$XMMRegister, $constantaddress($con));
2496   %}
2497   ins_pipe(pipe_slow);
2498 %}
2499 
2500 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
2501   predicate(UseAVX > 0);
2502   match(Set dst (DivF src1 src2));
2503 
2504   format %{ "vdivss  $dst, $src1, $src2" %}
2505   ins_cost(150);
2506   ins_encode %{
2507     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2508   %}
2509   ins_pipe(pipe_slow);
2510 %}
2511 
2512 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
2513   predicate(UseAVX > 0);
2514   match(Set dst (DivF src1 (LoadF src2)));
2515 
2516   format %{ "vdivss  $dst, $src1, $src2" %}
2517   ins_cost(150);
2518   ins_encode %{
2519     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2520   %}
2521   ins_pipe(pipe_slow);
2522 %}
2523 
2524 instruct divF_reg_imm(regF dst, regF src, immF con) %{
2525   predicate(UseAVX > 0);
2526   match(Set dst (DivF src con));
2527 
2528   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2529   ins_cost(150);
2530   ins_encode %{
2531     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2532   %}
2533   ins_pipe(pipe_slow);
2534 %}
2535 
2536 instruct divD_reg(regD dst, regD src) %{
2537   predicate((UseSSE>=2) && (UseAVX == 0));
2538   match(Set dst (DivD dst src));
2539 
2540   format %{ "divsd   $dst, $src" %}
2541   ins_cost(150);
2542   ins_encode %{
2543     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
2544   %}
2545   ins_pipe(pipe_slow);
2546 %}
2547 
2548 instruct divD_mem(regD dst, memory src) %{
2549   predicate((UseSSE>=2) && (UseAVX == 0));
2550   match(Set dst (DivD dst (LoadD src)));
2551 
2552   format %{ "divsd   $dst, $src" %}
2553   ins_cost(150);
2554   ins_encode %{
2555     __ divsd($dst$$XMMRegister, $src$$Address);
2556   %}
2557   ins_pipe(pipe_slow);
2558 %}
2559 
2560 instruct divD_imm(regD dst, immD con) %{
2561   predicate((UseSSE>=2) && (UseAVX == 0));
2562   match(Set dst (DivD dst con));
2563   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2564   ins_cost(150);
2565   ins_encode %{
2566     __ divsd($dst$$XMMRegister, $constantaddress($con));
2567   %}
2568   ins_pipe(pipe_slow);
2569 %}
2570 
2571 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
2572   predicate(UseAVX > 0);
2573   match(Set dst (DivD src1 src2));
2574 
2575   format %{ "vdivsd  $dst, $src1, $src2" %}
2576   ins_cost(150);
2577   ins_encode %{
2578     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2579   %}
2580   ins_pipe(pipe_slow);
2581 %}
2582 
2583 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
2584   predicate(UseAVX > 0);
2585   match(Set dst (DivD src1 (LoadD src2)));
2586 
2587   format %{ "vdivsd  $dst, $src1, $src2" %}
2588   ins_cost(150);
2589   ins_encode %{
2590     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2591   %}
2592   ins_pipe(pipe_slow);
2593 %}
2594 
2595 instruct divD_reg_imm(regD dst, regD src, immD con) %{
2596   predicate(UseAVX > 0);
2597   match(Set dst (DivD src con));
2598 
2599   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2600   ins_cost(150);
2601   ins_encode %{
2602     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2603   %}
2604   ins_pipe(pipe_slow);
2605 %}
2606 
2607 instruct absF_reg(regF dst) %{
2608   predicate((UseSSE>=1) && (UseAVX == 0));
2609   match(Set dst (AbsF dst));
2610   ins_cost(150);
2611   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
2612   ins_encode %{
2613     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
2614   %}
2615   ins_pipe(pipe_slow);
2616 %}
2617 
2618 instruct absF_reg_reg(regF dst, regF src) %{
2619   predicate(UseAVX > 0);
2620   match(Set dst (AbsF src));
2621   ins_cost(150);
2622   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
2623   ins_encode %{
2624     int vector_len = 0;
2625     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
2626               ExternalAddress(float_signmask()), vector_len);
2627   %}
2628   ins_pipe(pipe_slow);
2629 %}
2630 
2631 instruct absD_reg(regD dst) %{
2632   predicate((UseSSE>=2) && (UseAVX == 0));
2633   match(Set dst (AbsD dst));
2634   ins_cost(150);
2635   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
2636             "# abs double by sign masking" %}
2637   ins_encode %{
2638     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
2639   %}
2640   ins_pipe(pipe_slow);
2641 %}
2642 
2643 instruct absD_reg_reg(regD dst, regD src) %{
2644   predicate(UseAVX > 0);
2645   match(Set dst (AbsD src));
2646   ins_cost(150);
2647   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
2648             "# abs double by sign masking" %}
2649   ins_encode %{
2650     int vector_len = 0;
2651     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
2652               ExternalAddress(double_signmask()), vector_len);
2653   %}
2654   ins_pipe(pipe_slow);
2655 %}
2656 
2657 instruct negF_reg(regF dst) %{
2658   predicate((UseSSE>=1) && (UseAVX == 0));
2659   match(Set dst (NegF dst));
2660   ins_cost(150);
2661   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
2662   ins_encode %{
2663     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
2664   %}
2665   ins_pipe(pipe_slow);
2666 %}
2667 
2668 instruct negF_reg_reg(regF dst, regF src) %{
2669   predicate(UseAVX > 0);
2670   match(Set dst (NegF src));
2671   ins_cost(150);
2672   format %{ "vxorps  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
2673   ins_encode %{
2674     int vector_len = 0;
2675     __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
2676               ExternalAddress(float_signflip()), vector_len);
2677   %}
2678   ins_pipe(pipe_slow);
2679 %}
2680 
2681 instruct negD_reg(regD dst) %{
2682   predicate((UseSSE>=2) && (UseAVX == 0));
2683   match(Set dst (NegD dst));
2684   ins_cost(150);
2685   format %{ "xorpd   $dst, [0x8000000000000000]\t"
2686             "# neg double by sign flipping" %}
2687   ins_encode %{
2688     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
2689   %}
2690   ins_pipe(pipe_slow);
2691 %}
2692 
2693 instruct negD_reg_reg(regD dst, regD src) %{
2694   predicate(UseAVX > 0);
2695   match(Set dst (NegD src));
2696   ins_cost(150);
2697   format %{ "vxorpd  $dst, $src, [0x8000000000000000]\t"
2698             "# neg double by sign flipping" %}
2699   ins_encode %{
2700     int vector_len = 0;
2701     __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
2702               ExternalAddress(double_signflip()), vector_len);
2703   %}
2704   ins_pipe(pipe_slow);
2705 %}
2706 
2707 instruct sqrtF_reg(regF dst, regF src) %{
2708   predicate(UseSSE>=1);
2709   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
2710 
2711   format %{ "sqrtss  $dst, $src" %}
2712   ins_cost(150);
2713   ins_encode %{
2714     __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
2715   %}
2716   ins_pipe(pipe_slow);
2717 %}
2718 
2719 instruct sqrtF_mem(regF dst, memory src) %{
2720   predicate(UseSSE>=1);
2721   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
2722 
2723   format %{ "sqrtss  $dst, $src" %}
2724   ins_cost(150);
2725   ins_encode %{
2726     __ sqrtss($dst$$XMMRegister, $src$$Address);
2727   %}
2728   ins_pipe(pipe_slow);
2729 %}
2730 
2731 instruct sqrtF_imm(regF dst, immF con) %{
2732   predicate(UseSSE>=1);
2733   match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
2734   format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2735   ins_cost(150);
2736   ins_encode %{
2737     __ sqrtss($dst$$XMMRegister, $constantaddress($con));
2738   %}
2739   ins_pipe(pipe_slow);
2740 %}
2741 
2742 instruct sqrtD_reg(regD dst, regD src) %{
2743   predicate(UseSSE>=2);
2744   match(Set dst (SqrtD src));
2745 
2746   format %{ "sqrtsd  $dst, $src" %}
2747   ins_cost(150);
2748   ins_encode %{
2749     __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
2750   %}
2751   ins_pipe(pipe_slow);
2752 %}
2753 
2754 instruct sqrtD_mem(regD dst, memory src) %{
2755   predicate(UseSSE>=2);
2756   match(Set dst (SqrtD (LoadD src)));
2757 
2758   format %{ "sqrtsd  $dst, $src" %}
2759   ins_cost(150);
2760   ins_encode %{
2761     __ sqrtsd($dst$$XMMRegister, $src$$Address);
2762   %}
2763   ins_pipe(pipe_slow);
2764 %}
2765 
2766 instruct sqrtD_imm(regD dst, immD con) %{
2767   predicate(UseSSE>=2);
2768   match(Set dst (SqrtD con));
2769   format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2770   ins_cost(150);
2771   ins_encode %{
2772     __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
2773   %}
2774   ins_pipe(pipe_slow);
2775 %}
2776 
2777 // ====================VECTOR INSTRUCTIONS=====================================
2778 
2779 // Load vectors (4 bytes long)
2780 instruct loadV4(vecS dst, memory mem) %{
2781   predicate(n->as_LoadVector()->memory_size() == 4);
2782   match(Set dst (LoadVector mem));
2783   ins_cost(125);
2784   format %{ "movd    $dst,$mem\t! load vector (4 bytes)" %}
2785   ins_encode %{
2786     __ movdl($dst$$XMMRegister, $mem$$Address);
2787   %}
2788   ins_pipe( pipe_slow );
2789 %}
2790 
2791 // Load vectors (8 bytes long)
2792 instruct loadV8(vecD dst, memory mem) %{
2793   predicate(n->as_LoadVector()->memory_size() == 8);
2794   match(Set dst (LoadVector mem));
2795   ins_cost(125);
2796   format %{ "movq    $dst,$mem\t! load vector (8 bytes)" %}
2797   ins_encode %{
2798     __ movq($dst$$XMMRegister, $mem$$Address);
2799   %}
2800   ins_pipe( pipe_slow );
2801 %}
2802 
2803 // Load vectors (16 bytes long)
2804 instruct loadV16(vecX dst, memory mem) %{
2805   predicate(n->as_LoadVector()->memory_size() == 16);
2806   match(Set dst (LoadVector mem));
2807   ins_cost(125);
2808   format %{ "movdqu  $dst,$mem\t! load vector (16 bytes)" %}
2809   ins_encode %{
2810     __ movdqu($dst$$XMMRegister, $mem$$Address);
2811   %}
2812   ins_pipe( pipe_slow );
2813 %}
2814 
2815 // Load vectors (32 bytes long)
2816 instruct loadV32(vecY dst, memory mem) %{
2817   predicate(n->as_LoadVector()->memory_size() == 32);
2818   match(Set dst (LoadVector mem));
2819   ins_cost(125);
2820   format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
2821   ins_encode %{
2822     __ vmovdqu($dst$$XMMRegister, $mem$$Address);
2823   %}
2824   ins_pipe( pipe_slow );
2825 %}
2826 
2827 // Load vectors (64 bytes long)
2828 instruct loadV64(vecZ dst, memory mem) %{
2829   predicate(n->as_LoadVector()->memory_size() == 64);
2830   match(Set dst (LoadVector mem));
2831   ins_cost(125);
2832   format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %}
2833   ins_encode %{
2834     int vector_len = 2;
2835     __ evmovdqu($dst$$XMMRegister, $mem$$Address, vector_len);
2836   %}
2837   ins_pipe( pipe_slow );
2838 %}
2839 
2840 // Store vectors
2841 instruct storeV4(memory mem, vecS src) %{
2842   predicate(n->as_StoreVector()->memory_size() == 4);
2843   match(Set mem (StoreVector mem src));
2844   ins_cost(145);
2845   format %{ "movd    $mem,$src\t! store vector (4 bytes)" %}
2846   ins_encode %{
2847     __ movdl($mem$$Address, $src$$XMMRegister);
2848   %}
2849   ins_pipe( pipe_slow );
2850 %}
2851 
2852 instruct storeV8(memory mem, vecD src) %{
2853   predicate(n->as_StoreVector()->memory_size() == 8);
2854   match(Set mem (StoreVector mem src));
2855   ins_cost(145);
2856   format %{ "movq    $mem,$src\t! store vector (8 bytes)" %}
2857   ins_encode %{
2858     __ movq($mem$$Address, $src$$XMMRegister);
2859   %}
2860   ins_pipe( pipe_slow );
2861 %}
2862 
2863 instruct storeV16(memory mem, vecX src) %{
2864   predicate(n->as_StoreVector()->memory_size() == 16);
2865   match(Set mem (StoreVector mem src));
2866   ins_cost(145);
2867   format %{ "movdqu  $mem,$src\t! store vector (16 bytes)" %}
2868   ins_encode %{
2869     __ movdqu($mem$$Address, $src$$XMMRegister);
2870   %}
2871   ins_pipe( pipe_slow );
2872 %}
2873 
2874 instruct storeV32(memory mem, vecY src) %{
2875   predicate(n->as_StoreVector()->memory_size() == 32);
2876   match(Set mem (StoreVector mem src));
2877   ins_cost(145);
2878   format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
2879   ins_encode %{
2880     __ vmovdqu($mem$$Address, $src$$XMMRegister);
2881   %}
2882   ins_pipe( pipe_slow );
2883 %}
2884 
2885 instruct storeV64(memory mem, vecZ src) %{
2886   predicate(n->as_StoreVector()->memory_size() == 64);
2887   match(Set mem (StoreVector mem src));
2888   ins_cost(145);
2889   format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %}
2890   ins_encode %{
2891     int vector_len = 2;
2892     __ evmovdqu($mem$$Address, $src$$XMMRegister, vector_len);
2893   %}
2894   ins_pipe( pipe_slow );
2895 %}
2896 
2897 // ====================LEGACY REPLICATE=======================================
2898 
2899 instruct Repl16B(vecX dst, rRegI src) %{
2900   predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
2901   match(Set dst (ReplicateB src));
2902   format %{ "movd    $dst,$src\n\t"
2903             "punpcklbw $dst,$dst\n\t"
2904             "pshuflw $dst,$dst,0x00\n\t"
2905             "punpcklqdq $dst,$dst\t! replicate16B" %}
2906   ins_encode %{
2907     __ movdl($dst$$XMMRegister, $src$$Register);
2908     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
2909     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2910     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2911   %}
2912   ins_pipe( pipe_slow );
2913 %}
2914 
2915 instruct Repl16B_mem(vecX dst, memory mem) %{
2916   predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
2917   match(Set dst (ReplicateB (LoadB mem)));
2918   format %{ "punpcklbw $dst,$mem\n\t"
2919             "pshuflw $dst,$dst,0x00\n\t"
2920             "punpcklqdq $dst,$dst\t! replicate16B" %}
2921   ins_encode %{
2922     __ punpcklbw($dst$$XMMRegister, $mem$$Address);
2923     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2924     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2925   %}
2926   ins_pipe( pipe_slow );
2927 %}
2928 
2929 instruct Repl32B(vecY dst, rRegI src) %{
2930   predicate(n->as_Vector()->length() == 32 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
2931   match(Set dst (ReplicateB src));
2932   format %{ "movd    $dst,$src\n\t"
2933             "punpcklbw $dst,$dst\n\t"
2934             "pshuflw $dst,$dst,0x00\n\t"
2935             "punpcklqdq $dst,$dst\n\t"
2936             "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
2937   ins_encode %{
2938     __ movdl($dst$$XMMRegister, $src$$Register);
2939     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
2940     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2941     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2942     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2943   %}
2944   ins_pipe( pipe_slow );
2945 %}
2946 
2947 instruct Repl32B_mem(vecY dst, memory mem) %{
2948   predicate(n->as_Vector()->length() == 32 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
2949   match(Set dst (ReplicateB (LoadB mem)));
2950   format %{ "punpcklbw $dst,$mem\n\t"
2951             "pshuflw $dst,$dst,0x00\n\t"
2952             "punpcklqdq $dst,$dst\n\t"
2953             "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
2954   ins_encode %{
2955     __ punpcklbw($dst$$XMMRegister, $mem$$Address);
2956     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2957     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2958     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2959   %}
2960   ins_pipe( pipe_slow );
2961 %}
2962 
2963 instruct Repl16B_imm(vecX dst, immI con) %{
2964   predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
2965   match(Set dst (ReplicateB con));
2966   format %{ "movq    $dst,[$constantaddress]\n\t"
2967             "punpcklqdq $dst,$dst\t! replicate16B($con)" %}
2968   ins_encode %{
2969     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
2970     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2971   %}
2972   ins_pipe( pipe_slow );
2973 %}
2974 
2975 instruct Repl32B_imm(vecY dst, immI con) %{
2976   predicate(n->as_Vector()->length() == 32 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
2977   match(Set dst (ReplicateB con));
2978   format %{ "movq    $dst,[$constantaddress]\n\t"
2979             "punpcklqdq $dst,$dst\n\t"
2980             "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
2981   ins_encode %{
2982     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
2983     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2984     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2985   %}
2986   ins_pipe( pipe_slow );
2987 %}
2988 
2989 instruct Repl16B_zero(vecX dst, immI0 zero) %{
2990   predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && UseAVX < 3);
2991   match(Set dst (ReplicateB zero));
2992   format %{ "pxor    $dst,$dst\t! replicate16B zero" %}
2993   ins_encode %{
2994     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2995   %}
2996   ins_pipe( fpu_reg_reg );
2997 %}
2998 
2999 instruct Repl32B_zero(vecY dst, immI0 zero) %{
3000   predicate(n->as_Vector()->length() == 32 && UseAVX > 0 && UseAVX < 3);
3001   match(Set dst (ReplicateB zero));
3002   format %{ "vpxor   $dst,$dst,$dst\t! replicate32B zero" %}
3003   ins_encode %{
3004     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3005     int vector_len = 1;
3006     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3007   %}
3008   ins_pipe( fpu_reg_reg );
3009 %}
3010 
3011 instruct Repl8S(vecX dst, rRegI src) %{
3012   predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
3013   match(Set dst (ReplicateS src));
3014   format %{ "movd    $dst,$src\n\t"
3015             "pshuflw $dst,$dst,0x00\n\t"
3016             "punpcklqdq $dst,$dst\t! replicate8S" %}
3017   ins_encode %{
3018     __ movdl($dst$$XMMRegister, $src$$Register);
3019     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3020     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3021   %}
3022   ins_pipe( pipe_slow );
3023 %}
3024 
3025 instruct Repl16S(vecY dst, rRegI src) %{
3026   predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
3027   match(Set dst (ReplicateS src));
3028   format %{ "movd    $dst,$src\n\t"
3029             "pshuflw $dst,$dst,0x00\n\t"
3030             "punpcklqdq $dst,$dst\n\t"
3031             "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
3032   ins_encode %{
3033     __ movdl($dst$$XMMRegister, $src$$Register);
3034     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3035     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3036     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3037   %}
3038   ins_pipe( pipe_slow );
3039 %}
3040 
3041 instruct Repl8S_imm(vecX dst, immI con) %{
3042   predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
3043   match(Set dst (ReplicateS con));
3044   format %{ "movq    $dst,[$constantaddress]\n\t"
3045             "punpcklqdq $dst,$dst\t! replicate8S($con)" %}
3046   ins_encode %{
3047     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3048     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3049   %}
3050   ins_pipe( pipe_slow );
3051 %}
3052 
3053 instruct Repl16S_imm(vecY dst, immI con) %{
3054   predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
3055   match(Set dst (ReplicateS con));
3056   format %{ "movq    $dst,[$constantaddress]\n\t"
3057             "punpcklqdq $dst,$dst\n\t"
3058             "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
3059   ins_encode %{
3060     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3061     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3062     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3063   %}
3064   ins_pipe( pipe_slow );
3065 %}
3066 
3067 instruct Repl8S_zero(vecX dst, immI0 zero) %{
3068   predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && UseAVX < 3);
3069   match(Set dst (ReplicateS zero));
3070   format %{ "pxor    $dst,$dst\t! replicate8S zero" %}
3071   ins_encode %{
3072     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3073   %}
3074   ins_pipe( fpu_reg_reg );
3075 %}
3076 
3077 instruct Repl16S_zero(vecY dst, immI0 zero) %{
3078   predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && UseAVX < 3);
3079   match(Set dst (ReplicateS zero));
3080   format %{ "vpxor   $dst,$dst,$dst\t! replicate16S zero" %}
3081   ins_encode %{
3082     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3083     int vector_len = 1;
3084     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3085   %}
3086   ins_pipe( fpu_reg_reg );
3087 %}
3088 
3089 instruct Repl4I(vecX dst, rRegI src) %{
3090   predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl());
3091   match(Set dst (ReplicateI src));
3092   format %{ "movd    $dst,$src\n\t"
3093             "pshufd  $dst,$dst,0x00\t! replicate4I" %}
3094   ins_encode %{
3095     __ movdl($dst$$XMMRegister, $src$$Register);
3096     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3097   %}
3098   ins_pipe( pipe_slow );
3099 %}
3100 
3101 instruct Repl4I_mem(vecX dst, memory mem) %{
3102   predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl());
3103   match(Set dst (ReplicateI (LoadI mem)));
3104   format %{ "pshufd  $dst,$mem,0x00\t! replicate4I" %}
3105   ins_encode %{
3106     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
3107   %}
3108   ins_pipe( pipe_slow );
3109 %}
3110 
3111 instruct Repl8I(vecY dst, rRegI src) %{
3112   predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl());
3113   match(Set dst (ReplicateI src));
3114   format %{ "movd    $dst,$src\n\t"
3115             "pshufd  $dst,$dst,0x00\n\t"
3116             "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
3117   ins_encode %{
3118     __ movdl($dst$$XMMRegister, $src$$Register);
3119     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3120     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3121   %}
3122   ins_pipe( pipe_slow );
3123 %}
3124 
3125 instruct Repl8I_mem(vecY dst, memory mem) %{
3126   predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl());
3127   match(Set dst (ReplicateI (LoadI mem)));
3128   format %{ "pshufd  $dst,$mem,0x00\n\t"
3129             "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
3130   ins_encode %{
3131     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
3132     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3133   %}
3134   ins_pipe( pipe_slow );
3135 %}
3136 
3137 instruct Repl4I_imm(vecX dst, immI con) %{
3138   predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl());
3139   match(Set dst (ReplicateI con));
3140   format %{ "movq    $dst,[$constantaddress]\t! replicate4I($con)\n\t"
3141             "punpcklqdq $dst,$dst" %}
3142   ins_encode %{
3143     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3144     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3145   %}
3146   ins_pipe( pipe_slow );
3147 %}
3148 
3149 instruct Repl8I_imm(vecY dst, immI con) %{
3150   predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl());
3151   match(Set dst (ReplicateI con));
3152   format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
3153             "punpcklqdq $dst,$dst\n\t"
3154             "vinserti128h $dst,$dst,$dst" %}
3155   ins_encode %{
3156     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3157     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3158     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3159   %}
3160   ins_pipe( pipe_slow );
3161 %}
3162 
3163 instruct Repl4I_zero(vecX dst, immI0 zero) %{
3164   predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && UseAVX < 3);
3165   match(Set dst (ReplicateI zero));
3166   format %{ "pxor    $dst,$dst\t! replicate4I zero)" %}
3167   ins_encode %{
3168     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3169   %}
3170   ins_pipe( fpu_reg_reg );
3171 %}
3172 
3173 instruct Repl8I_zero(vecY dst, immI0 zero) %{
3174   predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && UseAVX < 3);
3175   match(Set dst (ReplicateI zero));
3176   format %{ "vpxor   $dst,$dst,$dst\t! replicate8I zero" %}
3177   ins_encode %{
3178     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3179     int vector_len = 1;
3180     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3181   %}
3182   ins_pipe( fpu_reg_reg );
3183 %}
3184 
3185 // Replicate long (8 byte) scalar to be vector
3186 #ifdef _LP64
3187 instruct Repl4L(vecY dst, rRegL src) %{
3188   predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl());
3189   match(Set dst (ReplicateL src));
3190   format %{ "movdq   $dst,$src\n\t"
3191             "punpcklqdq $dst,$dst\n\t"
3192             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
3193   ins_encode %{
3194     __ movdq($dst$$XMMRegister, $src$$Register);
3195     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3196     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3197   %}
3198   ins_pipe( pipe_slow );
3199 %}
3200 #else // _LP64
3201 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
3202   predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl());
3203   match(Set dst (ReplicateL src));
3204   effect(TEMP dst, USE src, TEMP tmp);
3205   format %{ "movdl   $dst,$src.lo\n\t"
3206             "movdl   $tmp,$src.hi\n\t"
3207             "punpckldq $dst,$tmp\n\t"
3208             "punpcklqdq $dst,$dst\n\t"
3209             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
3210   ins_encode %{
3211     __ movdl($dst$$XMMRegister, $src$$Register);
3212     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3213     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3214     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3215     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3216   %}
3217   ins_pipe( pipe_slow );
3218 %}
3219 #endif // _LP64
3220 
3221 instruct Repl4L_imm(vecY dst, immL con) %{
3222   predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl());
3223   match(Set dst (ReplicateL con));
3224   format %{ "movq    $dst,[$constantaddress]\n\t"
3225             "punpcklqdq $dst,$dst\n\t"
3226             "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
3227   ins_encode %{
3228     __ movq($dst$$XMMRegister, $constantaddress($con));
3229     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3230     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3231   %}
3232   ins_pipe( pipe_slow );
3233 %}
3234 
3235 instruct Repl4L_mem(vecY dst, memory mem) %{
3236   predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl());
3237   match(Set dst (ReplicateL (LoadL mem)));
3238   format %{ "movq    $dst,$mem\n\t"
3239             "punpcklqdq $dst,$dst\n\t"
3240             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
3241   ins_encode %{
3242     __ movq($dst$$XMMRegister, $mem$$Address);
3243     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3244     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3245   %}
3246   ins_pipe( pipe_slow );
3247 %}
3248 
3249 instruct Repl8L_mem(vecZ dst, memory mem) %{
3250   predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && UseAVX < 3);
3251   match(Set dst (ReplicateL (LoadL mem)));
3252   format %{ "movq    $dst,$mem\n\t"
3253             "punpcklqdq $dst,$dst\n\t"
3254             "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t"
3255             "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %}
3256   ins_encode %{
3257     __ movq($dst$$XMMRegister, $mem$$Address);
3258     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3259     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3260     __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3261   %}
3262   ins_pipe( pipe_slow );
3263 %}
3264 
3265 instruct Repl8F(vecY dst, regF src) %{
3266   predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl());
3267   match(Set dst (ReplicateF src));
3268   format %{ "pshufd  $dst,$src,0x00\n\t"
3269             "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
3270   ins_encode %{
3271     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3272     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3273   %}
3274   ins_pipe( pipe_slow );
3275 %}
3276 
3277 instruct Repl8F_mem(vecY dst, memory mem) %{
3278   predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl());
3279   match(Set dst (ReplicateF (LoadF mem)));
3280   format %{ "pshufd  $dst,$mem,0x00\n\t"
3281             "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
3282   ins_encode %{
3283     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
3284     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3285   %}
3286   ins_pipe( pipe_slow );
3287 %}
3288 
3289 instruct Repl4D(vecY dst, regD src) %{
3290   predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl());
3291   match(Set dst (ReplicateD src));
3292   format %{ "pshufd  $dst,$src,0x44\n\t"
3293             "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
3294   ins_encode %{
3295     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
3296     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3297   %}
3298   ins_pipe( pipe_slow );
3299 %}
3300 
3301 instruct Repl4D_mem(vecY dst, memory mem) %{
3302   predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl());
3303   match(Set dst (ReplicateD (LoadD mem)));
3304   format %{ "pshufd  $dst,$mem,0x44\n\t"
3305             "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
3306   ins_encode %{
3307     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
3308     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3309   %}
3310   ins_pipe( pipe_slow );
3311 %}
3312 
3313 // ====================GENERIC REPLICATE==========================================
3314 
3315 // Replicate byte scalar to be vector
3316 instruct Repl4B(vecS dst, rRegI src) %{
3317   predicate(n->as_Vector()->length() == 4);
3318   match(Set dst (ReplicateB src));
3319   format %{ "movd    $dst,$src\n\t"
3320             "punpcklbw $dst,$dst\n\t"
3321             "pshuflw $dst,$dst,0x00\t! replicate4B" %}
3322   ins_encode %{
3323     __ movdl($dst$$XMMRegister, $src$$Register);
3324     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
3325     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3326   %}
3327   ins_pipe( pipe_slow );
3328 %}
3329 
3330 instruct Repl4B_mem(vecS dst, memory mem) %{
3331   predicate(n->as_Vector()->length() == 4);
3332   match(Set dst (ReplicateB (LoadB mem)));
3333   format %{ "punpcklbw $dst,$mem\n\t"
3334             "pshuflw $dst,$dst,0x00\t! replicate4B" %}
3335   ins_encode %{
3336     __ punpcklbw($dst$$XMMRegister, $mem$$Address);
3337     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3338   %}
3339   ins_pipe( pipe_slow );
3340 %}
3341 
3342 instruct Repl8B(vecD dst, rRegI src) %{
3343   predicate(n->as_Vector()->length() == 8);
3344   match(Set dst (ReplicateB src));
3345   format %{ "movd    $dst,$src\n\t"
3346             "punpcklbw $dst,$dst\n\t"
3347             "pshuflw $dst,$dst,0x00\t! replicate8B" %}
3348   ins_encode %{
3349     __ movdl($dst$$XMMRegister, $src$$Register);
3350     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
3351     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3352   %}
3353   ins_pipe( pipe_slow );
3354 %}
3355 
3356 instruct Repl8B_mem(vecD dst, memory mem) %{
3357   predicate(n->as_Vector()->length() == 8);
3358   match(Set dst (ReplicateB (LoadB mem)));
3359   format %{ "punpcklbw $dst,$mem\n\t"
3360             "pshuflw $dst,$dst,0x00\t! replicate8B" %}
3361   ins_encode %{
3362     __ punpcklbw($dst$$XMMRegister, $mem$$Address);
3363     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3364   %}
3365   ins_pipe( pipe_slow );
3366 %}
3367 
3368 // Replicate byte scalar immediate to be vector by loading from const table.
3369 instruct Repl4B_imm(vecS dst, immI con) %{
3370   predicate(n->as_Vector()->length() == 4);
3371   match(Set dst (ReplicateB con));
3372   format %{ "movdl   $dst,[$constantaddress]\t! replicate4B($con)" %}
3373   ins_encode %{
3374     __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
3375   %}
3376   ins_pipe( pipe_slow );
3377 %}
3378 
3379 instruct Repl8B_imm(vecD dst, immI con) %{
3380   predicate(n->as_Vector()->length() == 8);
3381   match(Set dst (ReplicateB con));
3382   format %{ "movq    $dst,[$constantaddress]\t! replicate8B($con)" %}
3383   ins_encode %{
3384     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3385   %}
3386   ins_pipe( pipe_slow );
3387 %}
3388 
3389 // Replicate byte scalar zero to be vector
3390 instruct Repl4B_zero(vecS dst, immI0 zero) %{
3391   predicate(n->as_Vector()->length() == 4);
3392   match(Set dst (ReplicateB zero));
3393   format %{ "pxor    $dst,$dst\t! replicate4B zero" %}
3394   ins_encode %{
3395     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3396   %}
3397   ins_pipe( fpu_reg_reg );
3398 %}
3399 
3400 instruct Repl8B_zero(vecD dst, immI0 zero) %{
3401   predicate(n->as_Vector()->length() == 8);
3402   match(Set dst (ReplicateB zero));
3403   format %{ "pxor    $dst,$dst\t! replicate8B zero" %}
3404   ins_encode %{
3405     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3406   %}
3407   ins_pipe( fpu_reg_reg );
3408 %}
3409 
3410 // Replicate char/short (2 byte) scalar to be vector
3411 instruct Repl2S(vecS dst, rRegI src) %{
3412   predicate(n->as_Vector()->length() == 2);
3413   match(Set dst (ReplicateS src));
3414   format %{ "movd    $dst,$src\n\t"
3415             "pshuflw $dst,$dst,0x00\t! replicate2S" %}
3416   ins_encode %{
3417     __ movdl($dst$$XMMRegister, $src$$Register);
3418     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3419   %}
3420   ins_pipe( fpu_reg_reg );
3421 %}
3422 
3423 instruct Repl4S(vecD dst, rRegI src) %{
3424   predicate(n->as_Vector()->length() == 4);
3425   match(Set dst (ReplicateS src));
3426   format %{ "movd    $dst,$src\n\t"
3427             "pshuflw $dst,$dst,0x00\t! replicate4S" %}
3428   ins_encode %{
3429     __ movdl($dst$$XMMRegister, $src$$Register);
3430     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3431   %}
3432   ins_pipe( fpu_reg_reg );
3433 %}
3434 
3435 instruct Repl4S_mem(vecD dst, memory mem) %{
3436   predicate(n->as_Vector()->length() == 4);
3437   match(Set dst (ReplicateS (LoadS mem)));
3438   format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %}
3439   ins_encode %{
3440     __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00);
3441   %}
3442   ins_pipe( fpu_reg_reg );
3443 %}
3444 
3445 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
3446 instruct Repl2S_imm(vecS dst, immI con) %{
3447   predicate(n->as_Vector()->length() == 2);
3448   match(Set dst (ReplicateS con));
3449   format %{ "movdl   $dst,[$constantaddress]\t! replicate2S($con)" %}
3450   ins_encode %{
3451     __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
3452   %}
3453   ins_pipe( fpu_reg_reg );
3454 %}
3455 
3456 instruct Repl4S_imm(vecD dst, immI con) %{
3457   predicate(n->as_Vector()->length() == 4);
3458   match(Set dst (ReplicateS con));
3459   format %{ "movq    $dst,[$constantaddress]\t! replicate4S($con)" %}
3460   ins_encode %{
3461     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3462   %}
3463   ins_pipe( fpu_reg_reg );
3464 %}
3465 
3466 // Replicate char/short (2 byte) scalar zero to be vector
3467 instruct Repl2S_zero(vecS dst, immI0 zero) %{
3468   predicate(n->as_Vector()->length() == 2);
3469   match(Set dst (ReplicateS zero));
3470   format %{ "pxor    $dst,$dst\t! replicate2S zero" %}
3471   ins_encode %{
3472     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3473   %}
3474   ins_pipe( fpu_reg_reg );
3475 %}
3476 
3477 instruct Repl4S_zero(vecD dst, immI0 zero) %{
3478   predicate(n->as_Vector()->length() == 4);
3479   match(Set dst (ReplicateS zero));
3480   format %{ "pxor    $dst,$dst\t! replicate4S zero" %}
3481   ins_encode %{
3482     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3483   %}
3484   ins_pipe( fpu_reg_reg );
3485 %}
3486 
3487 // Replicate integer (4 byte) scalar to be vector
3488 instruct Repl2I(vecD dst, rRegI src) %{
3489   predicate(n->as_Vector()->length() == 2);
3490   match(Set dst (ReplicateI src));
3491   format %{ "movd    $dst,$src\n\t"
3492             "pshufd  $dst,$dst,0x00\t! replicate2I" %}
3493   ins_encode %{
3494     __ movdl($dst$$XMMRegister, $src$$Register);
3495     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3496   %}
3497   ins_pipe( fpu_reg_reg );
3498 %}
3499 
3500 // Integer could be loaded into xmm register directly from memory.
3501 instruct Repl2I_mem(vecD dst, memory mem) %{
3502   predicate(n->as_Vector()->length() == 2);
3503   match(Set dst (ReplicateI (LoadI mem)));
3504   format %{ "movd    $dst,$mem\n\t"
3505             "pshufd  $dst,$dst,0x00\t! replicate2I" %}
3506   ins_encode %{
3507     __ movdl($dst$$XMMRegister, $mem$$Address);
3508     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3509   %}
3510   ins_pipe( fpu_reg_reg );
3511 %}
3512 
3513 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
3514 instruct Repl2I_imm(vecD dst, immI con) %{
3515   predicate(n->as_Vector()->length() == 2);
3516   match(Set dst (ReplicateI con));
3517   format %{ "movq    $dst,[$constantaddress]\t! replicate2I($con)" %}
3518   ins_encode %{
3519     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3520   %}
3521   ins_pipe( fpu_reg_reg );
3522 %}
3523 
3524 // Replicate integer (4 byte) scalar zero to be vector
3525 instruct Repl2I_zero(vecD dst, immI0 zero) %{
3526   predicate(n->as_Vector()->length() == 2);
3527   match(Set dst (ReplicateI zero));
3528   format %{ "pxor    $dst,$dst\t! replicate2I" %}
3529   ins_encode %{
3530     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3531   %}
3532   ins_pipe( fpu_reg_reg );
3533 %}
3534 
3535 // Replicate long (8 byte) scalar to be vector
3536 #ifdef _LP64
3537 instruct Repl2L(vecX dst, rRegL src) %{
3538   predicate(n->as_Vector()->length() == 2);
3539   match(Set dst (ReplicateL src));
3540   format %{ "movdq   $dst,$src\n\t"
3541             "punpcklqdq $dst,$dst\t! replicate2L" %}
3542   ins_encode %{
3543     __ movdq($dst$$XMMRegister, $src$$Register);
3544     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3545   %}
3546   ins_pipe( pipe_slow );
3547 %}
3548 #else // _LP64
3549 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
3550   predicate(n->as_Vector()->length() == 2);
3551   match(Set dst (ReplicateL src));
3552   effect(TEMP dst, USE src, TEMP tmp);
3553   format %{ "movdl   $dst,$src.lo\n\t"
3554             "movdl   $tmp,$src.hi\n\t"
3555             "punpckldq $dst,$tmp\n\t"
3556             "punpcklqdq $dst,$dst\t! replicate2L"%}
3557   ins_encode %{
3558     __ movdl($dst$$XMMRegister, $src$$Register);
3559     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3560     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3561     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3562   %}
3563   ins_pipe( pipe_slow );
3564 %}
3565 #endif // _LP64
3566 
3567 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
3568 instruct Repl2L_imm(vecX dst, immL con) %{
3569   predicate(n->as_Vector()->length() == 2);
3570   match(Set dst (ReplicateL con));
3571   format %{ "movq    $dst,[$constantaddress]\n\t"
3572             "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
3573   ins_encode %{
3574     __ movq($dst$$XMMRegister, $constantaddress($con));
3575     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3576   %}
3577   ins_pipe( pipe_slow );
3578 %}
3579 
3580 // Long could be loaded into xmm register directly from memory.
3581 instruct Repl2L_mem(vecX dst, memory mem) %{
3582   predicate(n->as_Vector()->length() == 2);
3583   match(Set dst (ReplicateL (LoadL mem)));
3584   format %{ "movq    $dst,$mem\n\t"
3585             "punpcklqdq $dst,$dst\t! replicate2L" %}
3586   ins_encode %{
3587     __ movq($dst$$XMMRegister, $mem$$Address);
3588     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3589   %}
3590   ins_pipe( pipe_slow );
3591 %}
3592 
3593 // Replicate long (8 byte) scalar zero to be vector
3594 instruct Repl2L_zero(vecX dst, immL0 zero) %{
3595   predicate(n->as_Vector()->length() == 2);
3596   match(Set dst (ReplicateL zero));
3597   format %{ "pxor    $dst,$dst\t! replicate2L zero" %}
3598   ins_encode %{
3599     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3600   %}
3601   ins_pipe( fpu_reg_reg );
3602 %}
3603 
3604 instruct Repl4L_zero(vecY dst, immL0 zero) %{
3605   predicate(n->as_Vector()->length() == 4);
3606   match(Set dst (ReplicateL zero));
3607   format %{ "vpxor   $dst,$dst,$dst\t! replicate4L zero" %}
3608   ins_encode %{
3609     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3610     int vector_len = 1;
3611     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3612   %}
3613   ins_pipe( fpu_reg_reg );
3614 %}
3615 
3616 // Replicate float (4 byte) scalar to be vector
3617 instruct Repl2F(vecD dst, regF src) %{
3618   predicate(n->as_Vector()->length() == 2);
3619   match(Set dst (ReplicateF src));
3620   format %{ "pshufd  $dst,$dst,0x00\t! replicate2F" %}
3621   ins_encode %{
3622     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3623   %}
3624   ins_pipe( fpu_reg_reg );
3625 %}
3626 
3627 instruct Repl2F_mem(vecD dst, memory mem) %{
3628   predicate(n->as_Vector()->length() == 2);
3629   match(Set dst (ReplicateF (LoadF mem)));
3630   format %{ "pshufd  $dst,$mem,0x00\t! replicate2F" %}
3631   ins_encode %{
3632     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
3633   %}
3634   ins_pipe( pipe_slow );
3635 %}
3636 
3637 instruct Repl4F(vecX dst, regF src) %{
3638   predicate(n->as_Vector()->length() == 4);
3639   match(Set dst (ReplicateF src));
3640   format %{ "pshufd  $dst,$dst,0x00\t! replicate4F" %}
3641   ins_encode %{
3642     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3643   %}
3644   ins_pipe( pipe_slow );
3645 %}
3646 
3647 instruct Repl4F_mem(vecX dst, memory mem) %{
3648   predicate(n->as_Vector()->length() == 4);
3649   match(Set dst (ReplicateF (LoadF mem)));
3650   format %{ "pshufd  $dst,$mem,0x00\t! replicate4F" %}
3651   ins_encode %{
3652     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
3653   %}
3654   ins_pipe( pipe_slow );
3655 %}
3656 
3657 // Replicate float (4 byte) scalar zero to be vector
3658 instruct Repl2F_zero(vecD dst, immF0 zero) %{
3659   predicate(n->as_Vector()->length() == 2);
3660   match(Set dst (ReplicateF zero));
3661   format %{ "xorps   $dst,$dst\t! replicate2F zero" %}
3662   ins_encode %{
3663     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
3664   %}
3665   ins_pipe( fpu_reg_reg );
3666 %}
3667 
3668 instruct Repl4F_zero(vecX dst, immF0 zero) %{
3669   predicate(n->as_Vector()->length() == 4);
3670   match(Set dst (ReplicateF zero));
3671   format %{ "xorps   $dst,$dst\t! replicate4F zero" %}
3672   ins_encode %{
3673     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
3674   %}
3675   ins_pipe( fpu_reg_reg );
3676 %}
3677 
3678 instruct Repl8F_zero(vecY dst, immF0 zero) %{
3679   predicate(n->as_Vector()->length() == 8);
3680   match(Set dst (ReplicateF zero));
3681   format %{ "vxorps  $dst,$dst,$dst\t! replicate8F zero" %}
3682   ins_encode %{
3683     int vector_len = 1;
3684     __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3685   %}
3686   ins_pipe( fpu_reg_reg );
3687 %}
3688 
3689 // Replicate double (8 bytes) scalar to be vector
3690 instruct Repl2D(vecX dst, regD src) %{
3691   predicate(n->as_Vector()->length() == 2);
3692   match(Set dst (ReplicateD src));
3693   format %{ "pshufd  $dst,$src,0x44\t! replicate2D" %}
3694   ins_encode %{
3695     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
3696   %}
3697   ins_pipe( pipe_slow );
3698 %}
3699 
3700 instruct Repl2D_mem(vecX dst, memory mem) %{
3701   predicate(n->as_Vector()->length() == 2);
3702   match(Set dst (ReplicateD (LoadD mem)));
3703   format %{ "pshufd  $dst,$mem,0x44\t! replicate2D" %}
3704   ins_encode %{
3705     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
3706   %}
3707   ins_pipe( pipe_slow );
3708 %}
3709 
3710 // Replicate double (8 byte) scalar zero to be vector
3711 instruct Repl2D_zero(vecX dst, immD0 zero) %{
3712   predicate(n->as_Vector()->length() == 2);
3713   match(Set dst (ReplicateD zero));
3714   format %{ "xorpd   $dst,$dst\t! replicate2D zero" %}
3715   ins_encode %{
3716     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
3717   %}
3718   ins_pipe( fpu_reg_reg );
3719 %}
3720 
3721 instruct Repl4D_zero(vecY dst, immD0 zero) %{
3722   predicate(n->as_Vector()->length() == 4);
3723   match(Set dst (ReplicateD zero));
3724   format %{ "vxorpd  $dst,$dst,$dst,vect256\t! replicate4D zero" %}
3725   ins_encode %{
3726     int vector_len = 1;
3727     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3728   %}
3729   ins_pipe( fpu_reg_reg );
3730 %}
3731 
3732 // ====================EVEX REPLICATE=============================================
3733 
3734 // Note: some of the legacy forms are applicable to EVEX
3735 
3736 instruct Repl16B_evex(vecX dst, rRegI src) %{
3737   predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
3738   match(Set dst (ReplicateB src));
3739   format %{ "vpbroadcastb $dst,$src\t! replicate16B" %}
3740   ins_encode %{
3741    int vector_len = 0;
3742     __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len);
3743   %}
3744   ins_pipe( pipe_slow );
3745 %}
3746 
3747 instruct Repl16B_mem_evex(vecX dst, memory mem) %{
3748   predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
3749   match(Set dst (ReplicateB (LoadB mem)));
3750   format %{ "vpbroadcastb  $dst,$mem\t! replicate16B" %}
3751   ins_encode %{
3752     int vector_len = 0;
3753     __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
3754   %}
3755   ins_pipe( pipe_slow );
3756 %}
3757 
3758 instruct Repl32B_evex(vecY dst, rRegI src) %{
3759   predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw());
3760   match(Set dst (ReplicateB src));
3761   format %{ "vpbroadcastb $dst,$src\t! replicate32B" %}
3762   ins_encode %{
3763    int vector_len = 1;
3764     __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len);
3765   %}
3766   ins_pipe( pipe_slow );
3767 %}
3768 
3769 instruct Repl32B_mem_evex(vecY dst, memory mem) %{
3770   predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw());
3771   match(Set dst (ReplicateB (LoadB mem)));
3772   format %{ "vpbroadcastb  $dst,$mem\t! replicate32B" %}
3773   ins_encode %{
3774     int vector_len = 1;
3775     __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
3776   %}
3777   ins_pipe( pipe_slow );
3778 %}
3779 
3780 instruct Repl64B_evex(vecZ dst, rRegI src) %{
3781   predicate(n->as_Vector()->length() == 64 && UseAVX > 2);
3782   match(Set dst (ReplicateB src));
3783   format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %}
3784   ins_encode %{
3785    int vector_len = 2;
3786     __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len);
3787   %}
3788   ins_pipe( pipe_slow );
3789 %}
3790 
3791 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{
3792   predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512vlbw());
3793   match(Set dst (ReplicateB (LoadB mem)));
3794   format %{ "vpbroadcastb  $dst,$mem\t! replicate64B" %}
3795   ins_encode %{
3796     int vector_len = 2;
3797     __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
3798   %}
3799   ins_pipe( pipe_slow );
3800 %}
3801 
3802 instruct Repl16B_imm_evex(vecX dst, immI con) %{
3803   predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
3804   match(Set dst (ReplicateB con));
3805   format %{ "movq    $dst,[$constantaddress]\n\t"
3806             "vpbroadcastb $dst,$dst\t! replicate16B" %}
3807   ins_encode %{
3808    int vector_len = 0;
3809     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3810     __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3811   %}
3812   ins_pipe( pipe_slow );
3813 %}
3814 
3815 instruct Repl32B_imm_evex(vecY dst, immI con) %{
3816   predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw());
3817   match(Set dst (ReplicateB con));
3818   format %{ "movq    $dst,[$constantaddress]\n\t"
3819             "vpbroadcastb $dst,$dst\t! replicate32B" %}
3820   ins_encode %{
3821    int vector_len = 1;
3822     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3823     __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3824   %}
3825   ins_pipe( pipe_slow );
3826 %}
3827 
3828 instruct Repl64B_imm_evex(vecZ dst, immI con) %{
3829   predicate(n->as_Vector()->length() == 64 && UseAVX > 2);
3830   match(Set dst (ReplicateB con));
3831   format %{ "movq    $dst,[$constantaddress]\n\t"
3832             "vpbroadcastb $dst,$dst\t! upper replicate64B" %}
3833   ins_encode %{
3834    int vector_len = 2;
3835     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3836     __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3837   %}
3838   ins_pipe( pipe_slow );
3839   ins_pipe( pipe_slow );
3840 %}
3841 
3842 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{
3843   predicate(n->as_Vector()->length() == 64 && UseAVX > 2);
3844   match(Set dst (ReplicateB zero));
3845   format %{ "vpxor   $dst k0,$dst,$dst\t! replicate64B zero" %}
3846   ins_encode %{
3847     // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
3848     int vector_len = 2;
3849     __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3850   %}
3851   ins_pipe( fpu_reg_reg );
3852 %}
3853 
3854 instruct Repl8S_evex(vecX dst, rRegI src) %{
3855   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
3856   match(Set dst (ReplicateS src));
3857   format %{ "vpbroadcastw $dst,$src\t! replicate8S" %}
3858   ins_encode %{
3859    int vector_len = 0;
3860     __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
3861   %}
3862   ins_pipe( pipe_slow );
3863 %}
3864 
3865 instruct Repl8S_mem_evex(vecX dst, memory mem) %{
3866   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
3867   match(Set dst (ReplicateS (LoadS mem)));
3868   format %{ "vpbroadcastw  $dst,$mem\t! replicate8S" %}
3869   ins_encode %{
3870     int vector_len = 0;
3871     __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
3872   %}
3873   ins_pipe( pipe_slow );
3874 %}
3875 
3876 instruct Repl16S_evex(vecY dst, rRegI src) %{
3877   predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
3878   match(Set dst (ReplicateS src));
3879   format %{ "vpbroadcastw $dst,$src\t! replicate16S" %}
3880   ins_encode %{
3881    int vector_len = 1;
3882     __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
3883   %}
3884   ins_pipe( pipe_slow );
3885 %}
3886 
3887 instruct Repl16S_mem_evex(vecY dst, memory mem) %{
3888   predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
3889   match(Set dst (ReplicateS (LoadS mem)));
3890   format %{ "vpbroadcastw  $dst,$mem\t! replicate16S" %}
3891   ins_encode %{
3892     int vector_len = 1;
3893     __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
3894   %}
3895   ins_pipe( pipe_slow );
3896 %}
3897 
3898 instruct Repl32S_evex(vecZ dst, rRegI src) %{
3899   predicate(n->as_Vector()->length() == 32 && UseAVX > 2);
3900   match(Set dst (ReplicateS src));
3901   format %{ "vpbroadcastw $dst,$src\t! replicate32S" %}
3902   ins_encode %{
3903    int vector_len = 2;
3904     __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
3905   %}
3906   ins_pipe( pipe_slow );
3907 %}
3908 
3909 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{
3910   predicate(n->as_Vector()->length() == 32 && UseAVX > 2);
3911   match(Set dst (ReplicateS (LoadS mem)));
3912   format %{ "vpbroadcastw  $dst,$mem\t! replicate32S" %}
3913   ins_encode %{
3914     int vector_len = 2;
3915     __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
3916   %}
3917   ins_pipe( pipe_slow );
3918 %}
3919 
3920 instruct Repl8S_imm_evex(vecX dst, immI con) %{
3921   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
3922   match(Set dst (ReplicateS con));
3923   format %{ "movq    $dst,[$constantaddress]\n\t"
3924             "vpbroadcastw $dst,$dst\t! replicate8S" %}
3925   ins_encode %{
3926    int vector_len = 0;
3927     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3928     __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3929   %}
3930   ins_pipe( pipe_slow );
3931 %}
3932 
3933 instruct Repl16S_imm_evex(vecY dst, immI con) %{
3934   predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
3935   match(Set dst (ReplicateS con));
3936   format %{ "movq    $dst,[$constantaddress]\n\t"
3937             "vpbroadcastw $dst,$dst\t! replicate16S" %}
3938   ins_encode %{
3939    int vector_len = 1;
3940     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3941     __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3942   %}
3943   ins_pipe( pipe_slow );
3944 %}
3945 
3946 instruct Repl32S_imm_evex(vecZ dst, immI con) %{
3947   predicate(n->as_Vector()->length() == 32 && UseAVX > 2);
3948   match(Set dst (ReplicateS con));
3949   format %{ "movq    $dst,[$constantaddress]\n\t"
3950             "vpbroadcastw $dst,$dst\t! replicate32S" %}
3951   ins_encode %{
3952    int vector_len = 2;
3953     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3954     __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3955   %}
3956   ins_pipe( pipe_slow );
3957 %}
3958 
3959 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{
3960   predicate(n->as_Vector()->length() == 32 && UseAVX > 2);
3961   match(Set dst (ReplicateS zero));
3962   format %{ "vpxor   $dst k0,$dst,$dst\t! replicate32S zero" %}
3963   ins_encode %{
3964     // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
3965     int vector_len = 2;
3966     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3967   %}
3968   ins_pipe( fpu_reg_reg );
3969 %}
3970 
3971 instruct Repl4I_evex(vecX dst, rRegI src) %{
3972   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
3973   match(Set dst (ReplicateI src));
3974   format %{ "vpbroadcastd  $dst,$src\t! replicate4I" %}
3975   ins_encode %{
3976     int vector_len = 0;
3977     __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len);
3978   %}
3979   ins_pipe( pipe_slow );
3980 %}
3981 
3982 instruct Repl4I_mem_evex(vecX dst, memory mem) %{
3983   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
3984   match(Set dst (ReplicateI (LoadI mem)));
3985   format %{ "vpbroadcastd  $dst,$mem\t! replicate4I" %}
3986   ins_encode %{
3987     int vector_len = 0;
3988     __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
3989   %}
3990   ins_pipe( pipe_slow );
3991 %}
3992 
3993 instruct Repl8I_evex(vecY dst, rRegI src) %{
3994   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
3995   match(Set dst (ReplicateI src));
3996   format %{ "vpbroadcastd  $dst,$src\t! replicate8I" %}
3997   ins_encode %{
3998     int vector_len = 1;
3999     __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len);
4000   %}
4001   ins_pipe( pipe_slow );
4002 %}
4003 
4004 instruct Repl8I_mem_evex(vecY dst, memory mem) %{
4005   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
4006   match(Set dst (ReplicateI (LoadI mem)));
4007   format %{ "vpbroadcastd  $dst,$mem\t! replicate8I" %}
4008   ins_encode %{
4009     int vector_len = 1;
4010     __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
4011   %}
4012   ins_pipe( pipe_slow );
4013 %}
4014 
4015 instruct Repl16I_evex(vecZ dst, rRegI src) %{
4016   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
4017   match(Set dst (ReplicateI src));
4018   format %{ "vpbroadcastd  $dst,$src\t! replicate16I" %}
4019   ins_encode %{
4020     int vector_len = 2;
4021     __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len);
4022   %}
4023   ins_pipe( pipe_slow );
4024 %}
4025 
4026 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{
4027   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
4028   match(Set dst (ReplicateI (LoadI mem)));
4029   format %{ "vpbroadcastd  $dst,$mem\t! replicate16I" %}
4030   ins_encode %{
4031     int vector_len = 2;
4032     __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
4033   %}
4034   ins_pipe( pipe_slow );
4035 %}
4036 
4037 instruct Repl4I_imm_evex(vecX dst, immI con) %{
4038   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4039   match(Set dst (ReplicateI con));
4040   format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
4041             "vpbroadcastd  $dst,$dst\t! replicate4I" %}
4042   ins_encode %{
4043     int vector_len = 0;
4044     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
4045     __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4046   %}
4047   ins_pipe( pipe_slow );
4048 %}
4049 
4050 instruct Repl8I_imm_evex(vecY dst, immI con) %{
4051   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
4052   match(Set dst (ReplicateI con));
4053   format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
4054             "vpbroadcastd  $dst,$dst\t! replicate8I" %}
4055   ins_encode %{
4056     int vector_len = 1;
4057     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
4058     __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4059   %}
4060   ins_pipe( pipe_slow );
4061 %}
4062 
4063 instruct Repl16I_imm_evex(vecZ dst, immI con) %{
4064   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
4065   match(Set dst (ReplicateI con));
4066   format %{ "movq    $dst,[$constantaddress]\t! replicate16I($con)\n\t"
4067             "vpbroadcastd  $dst,$dst\t! replicate16I" %}
4068   ins_encode %{
4069     int vector_len = 2;
4070     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
4071     __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4072   %}
4073   ins_pipe( pipe_slow );
4074 %}
4075 
4076 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{
4077   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
4078   match(Set dst (ReplicateI zero));
4079   format %{ "vpxor   $dst k0,$dst,$dst\t! replicate16I zero" %}
4080   ins_encode %{
4081     // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it).
4082     int vector_len = 2;
4083     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4084   %}
4085   ins_pipe( fpu_reg_reg );
4086 %}
4087 
4088 // Replicate long (8 byte) scalar to be vector
4089 #ifdef _LP64
4090 instruct Repl4L_evex(vecY dst, rRegL src) %{
4091   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4092   match(Set dst (ReplicateL src));
4093   format %{ "vpbroadcastq  $dst,$src\t! replicate4L" %}
4094   ins_encode %{
4095     int vector_len = 1;
4096     __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len);
4097   %}
4098   ins_pipe( pipe_slow );
4099 %}
4100 
4101 instruct Repl8L_evex(vecZ dst, rRegL src) %{
4102   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4103   match(Set dst (ReplicateL src));
4104   format %{ "vpbroadcastq  $dst,$src\t! replicate8L" %}
4105   ins_encode %{
4106     int vector_len = 2;
4107     __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len);
4108   %}
4109   ins_pipe( pipe_slow );
4110 %}
4111 #else // _LP64
4112 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{
4113   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4114   match(Set dst (ReplicateL src));
4115   effect(TEMP dst, USE src, TEMP tmp);
4116   format %{ "movdl   $dst,$src.lo\n\t"
4117             "movdl   $tmp,$src.hi\n\t"
4118             "punpckldq $dst,$tmp\n\t"
4119             "vpbroadcastq  $dst,$dst\t! replicate4L" %}
4120   ins_encode %{
4121     int vector_len = 1;
4122     __ movdl($dst$$XMMRegister, $src$$Register);
4123     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
4124     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
4125     __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4126   %}
4127   ins_pipe( pipe_slow );
4128 %}
4129 
4130 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{
4131   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4132   match(Set dst (ReplicateL src));
4133   effect(TEMP dst, USE src, TEMP tmp);
4134   format %{ "movdl   $dst,$src.lo\n\t"
4135             "movdl   $tmp,$src.hi\n\t"
4136             "punpckldq $dst,$tmp\n\t"
4137             "vpbroadcastq  $dst,$dst\t! replicate8L" %}
4138   ins_encode %{
4139     int vector_len = 2;
4140     __ movdl($dst$$XMMRegister, $src$$Register);
4141     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
4142     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
4143     __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4144   %}
4145   ins_pipe( pipe_slow );
4146 %}
4147 #endif // _LP64
4148 
4149 instruct Repl4L_imm_evex(vecY dst, immL con) %{
4150   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4151   match(Set dst (ReplicateL con));
4152   format %{ "movq    $dst,[$constantaddress]\n\t"
4153             "vpbroadcastq  $dst,$dst\t! replicate4L" %}
4154   ins_encode %{
4155     int vector_len = 1;
4156     __ movq($dst$$XMMRegister, $constantaddress($con));
4157     __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4158   %}
4159   ins_pipe( pipe_slow );
4160 %}
4161 
4162 instruct Repl8L_imm_evex(vecZ dst, immL con) %{
4163   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4164   match(Set dst (ReplicateL con));
4165   format %{ "movq    $dst,[$constantaddress]\n\t"
4166             "vpbroadcastq  $dst,$dst\t! replicate8L" %}
4167   ins_encode %{
4168     int vector_len = 2;
4169     __ movq($dst$$XMMRegister, $constantaddress($con));
4170     __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4171   %}
4172   ins_pipe( pipe_slow );
4173 %}
4174 
4175 instruct Repl4L_mem_evex(vecY dst, memory mem) %{
4176   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4177   match(Set dst (ReplicateL (LoadL mem)));
4178   format %{ "vpbroadcastd  $dst,$mem\t! replicate4L" %}
4179   ins_encode %{
4180     int vector_len = 1;
4181     __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
4182   %}
4183   ins_pipe( pipe_slow );
4184 %}
4185 
4186 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{
4187   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4188   match(Set dst (ReplicateL (LoadL mem)));
4189   format %{ "vpbroadcastd  $dst,$mem\t! replicate8L" %}
4190   ins_encode %{
4191     int vector_len = 2;
4192     __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
4193   %}
4194   ins_pipe( pipe_slow );
4195 %}
4196 
4197 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{
4198   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4199   match(Set dst (ReplicateL zero));
4200   format %{ "vpxor   $dst k0,$dst,$dst\t! replicate8L zero" %}
4201   ins_encode %{
4202     // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
4203     int vector_len = 2;
4204     __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4205   %}
4206   ins_pipe( fpu_reg_reg );
4207 %}
4208 
4209 instruct Repl8F_evex(vecY dst, regF src) %{
4210   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
4211   match(Set dst (ReplicateF src));
4212   format %{ "vbroadcastss $dst,$src\t! replicate8F" %}
4213   ins_encode %{
4214     int vector_len = 1;
4215     __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len);
4216   %}
4217   ins_pipe( pipe_slow );
4218 %}
4219 
4220 instruct Repl8F_mem_evex(vecY dst, memory mem) %{
4221   predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
4222   match(Set dst (ReplicateF (LoadF mem)));
4223   format %{ "vbroadcastss  $dst,$mem\t! replicate8F" %}
4224   ins_encode %{
4225     int vector_len = 1;
4226     __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
4227   %}
4228   ins_pipe( pipe_slow );
4229 %}
4230 
4231 instruct Repl16F_evex(vecZ dst, regF src) %{
4232   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
4233   match(Set dst (ReplicateF src));
4234   format %{ "vbroadcastss $dst,$src\t! replicate16F" %}
4235   ins_encode %{
4236     int vector_len = 2;
4237     __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len);
4238   %}
4239   ins_pipe( pipe_slow );
4240 %}
4241 
4242 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{
4243   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
4244   match(Set dst (ReplicateF (LoadF mem)));
4245   format %{ "vbroadcastss  $dst,$mem\t! replicate16F" %}
4246   ins_encode %{
4247     int vector_len = 2;
4248     __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
4249   %}
4250   ins_pipe( pipe_slow );
4251 %}
4252 
4253 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{
4254   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
4255   match(Set dst (ReplicateF zero));
4256   format %{ "vxorps  $dst k0,$dst,$dst\t! replicate16F zero" %}
4257   ins_encode %{
4258     int vector_len = 2;
4259     __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4260   %}
4261   ins_pipe( fpu_reg_reg );
4262 %}
4263 
4264 instruct Repl4D_evex(vecY dst, regD src) %{
4265   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4266   match(Set dst (ReplicateD src));
4267   format %{ "vbroadcastsd $dst,$src\t! replicate4D" %}
4268   ins_encode %{
4269     int vector_len = 1;
4270     __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
4271   %}
4272   ins_pipe( pipe_slow );
4273 %}
4274 
4275 instruct Repl4D_mem_evex(vecY dst, memory mem) %{
4276   predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
4277   match(Set dst (ReplicateD (LoadD mem)));
4278   format %{ "vbroadcastsd  $dst,$mem\t! replicate4D" %}
4279   ins_encode %{
4280     int vector_len = 1;
4281     __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
4282   %}
4283   ins_pipe( pipe_slow );
4284 %}
4285 
4286 instruct Repl8D_evex(vecZ dst, regD src) %{
4287   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4288   match(Set dst (ReplicateD src));
4289   format %{ "vbroadcastsd $dst,$src\t! replicate8D" %}
4290   ins_encode %{
4291     int vector_len = 2;
4292     __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
4293   %}
4294   ins_pipe( pipe_slow );
4295 %}
4296 
4297 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{
4298   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4299   match(Set dst (ReplicateD (LoadD mem)));
4300   format %{ "vbroadcastsd  $dst,$mem\t! replicate8D" %}
4301   ins_encode %{
4302     int vector_len = 2;
4303     __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
4304   %}
4305   ins_pipe( pipe_slow );
4306 %}
4307 
4308 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{
4309   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
4310   match(Set dst (ReplicateD zero));
4311   format %{ "vxorpd  $dst k0,$dst,$dst,vect512\t! replicate8D zero" %}
4312   ins_encode %{
4313     int vector_len = 2;
4314     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4315   %}
4316   ins_pipe( fpu_reg_reg );
4317 %}
4318 
4319 // ====================REDUCTION ARITHMETIC=======================================
4320 
4321 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
4322   predicate(UseSSE > 2 && UseAVX == 0);
4323   match(Set dst (AddReductionVI src1 src2));
4324   effect(TEMP tmp2, TEMP tmp);
4325   format %{ "movdqu  $tmp2,$src2\n\t"
4326             "phaddd  $tmp2,$tmp2\n\t"
4327             "movd    $tmp,$src1\n\t"
4328             "paddd   $tmp,$tmp2\n\t"
4329             "movd    $dst,$tmp\t! add reduction2I" %}
4330   ins_encode %{
4331     __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
4332     __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
4333     __ movdl($tmp$$XMMRegister, $src1$$Register);
4334     __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
4335     __ movdl($dst$$Register, $tmp$$XMMRegister);
4336   %}
4337   ins_pipe( pipe_slow );
4338 %}
4339 
4340 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
4341   predicate(UseAVX > 0 && UseAVX < 3);
4342   match(Set dst (AddReductionVI src1 src2));
4343   effect(TEMP tmp, TEMP tmp2);
4344   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
4345             "movd     $tmp2,$src1\n\t"
4346             "vpaddd   $tmp2,$tmp2,$tmp\n\t"
4347             "movd     $dst,$tmp2\t! add reduction2I" %}
4348   ins_encode %{
4349     int vector_len = 0;
4350     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4351     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4352     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
4353     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4354   %}
4355   ins_pipe( pipe_slow );
4356 %}
4357 
4358 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
4359   predicate(UseAVX > 2);
4360   match(Set dst (AddReductionVI src1 src2));
4361   effect(TEMP tmp, TEMP tmp2);
4362   format %{ "pshufd  $tmp2,$src2,0x1\n\t"
4363             "vpaddd  $tmp,$src2,$tmp2\n\t"
4364             "movd    $tmp2,$src1\n\t"
4365             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
4366             "movd    $dst,$tmp2\t! add reduction2I" %}
4367   ins_encode %{
4368     int vector_len = 0;
4369     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
4370     __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4371     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4372     __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4373     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4374   %}
4375   ins_pipe( pipe_slow );
4376 %}
4377 
4378 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
4379   predicate(UseSSE > 2 && UseAVX == 0);
4380   match(Set dst (AddReductionVI src1 src2));
4381   effect(TEMP tmp2, TEMP tmp);
4382   format %{ "movdqu  $tmp2,$src2\n\t"
4383             "phaddd  $tmp2,$tmp2\n\t"
4384             "phaddd  $tmp2,$tmp2\n\t"
4385             "movd    $tmp,$src1\n\t"
4386             "paddd   $tmp,$tmp2\n\t"
4387             "movd    $dst,$tmp\t! add reduction4I" %}
4388   ins_encode %{
4389     __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
4390     __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
4391     __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
4392     __ movdl($tmp$$XMMRegister, $src1$$Register);
4393     __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
4394     __ movdl($dst$$Register, $tmp$$XMMRegister);
4395   %}
4396   ins_pipe( pipe_slow );
4397 %}
4398 
4399 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
4400   predicate(UseAVX > 0 && UseAVX < 3);
4401   match(Set dst (AddReductionVI src1 src2));
4402   effect(TEMP tmp, TEMP tmp2);
4403   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
4404             "vphaddd  $tmp,$tmp,$tmp2\n\t"
4405             "movd     $tmp2,$src1\n\t"
4406             "vpaddd   $tmp2,$tmp2,$tmp\n\t"
4407             "movd     $dst,$tmp2\t! add reduction4I" %}
4408   ins_encode %{
4409     int vector_len = 0;
4410     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4411     __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4412     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4413     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
4414     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4415   %}
4416   ins_pipe( pipe_slow );
4417 %}
4418 
4419 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
4420   predicate(UseAVX > 2);
4421   match(Set dst (AddReductionVI src1 src2));
4422   effect(TEMP tmp, TEMP tmp2);
4423   format %{ "pshufd  $tmp2,$src2,0xE\n\t"
4424             "vpaddd  $tmp,$src2,$tmp2\n\t"
4425             "pshufd  $tmp2,$tmp,0x1\n\t"
4426             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4427             "movd    $tmp2,$src1\n\t"
4428             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
4429             "movd    $dst,$tmp2\t! add reduction4I" %}
4430   ins_encode %{
4431     int vector_len = 0;
4432     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4433     __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4434     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4435     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4436     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4437     __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4438     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4439   %}
4440   ins_pipe( pipe_slow );
4441 %}
4442 
4443 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
4444   predicate(UseAVX > 0 && UseAVX < 3);
4445   match(Set dst (AddReductionVI src1 src2));
4446   effect(TEMP tmp, TEMP tmp2);
4447   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
4448             "vphaddd  $tmp,$tmp,$tmp2\n\t"
4449             "vextracti128  $tmp2,$tmp\n\t"
4450             "vpaddd   $tmp,$tmp,$tmp2\n\t"
4451             "movd     $tmp2,$src1\n\t"
4452             "vpaddd   $tmp2,$tmp2,$tmp\n\t"
4453             "movd     $dst,$tmp2\t! add reduction8I" %}
4454   ins_encode %{
4455     int vector_len = 1;
4456     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4457     __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4458     __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister);
4459     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4460     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4461     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4462     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4463   %}
4464   ins_pipe( pipe_slow );
4465 %}
4466 
4467 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
4468   predicate(UseAVX > 2);
4469   match(Set dst (AddReductionVI src1 src2));
4470   effect(TEMP tmp, TEMP tmp2);
4471   format %{ "vextracti128  $tmp,$src2\n\t"
4472             "vpaddd  $tmp,$tmp,$src2\n\t"
4473             "pshufd  $tmp2,$tmp,0xE\n\t"
4474             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4475             "pshufd  $tmp2,$tmp,0x1\n\t"
4476             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4477             "movd    $tmp2,$src1\n\t"
4478             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
4479             "movd    $dst,$tmp2\t! add reduction8I" %}
4480   ins_encode %{
4481     int vector_len = 0;
4482     __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
4483     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
4484     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
4485     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4486     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4487     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4488     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4489     __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4490     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4491   %}
4492   ins_pipe( pipe_slow );
4493 %}
4494 
4495 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
4496   predicate(UseAVX > 2);
4497   match(Set dst (AddReductionVI src1 src2));
4498   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4499   format %{ "vextracti64x4  $tmp3,$src2\n\t"
4500             "vpaddd  $tmp3,$tmp3,$src2\n\t"
4501             "vextracti128   $tmp,$tmp3\n\t"
4502             "vpaddd  $tmp,$tmp,$tmp3\n\t"
4503             "pshufd  $tmp2,$tmp,0xE\n\t"
4504             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4505             "pshufd  $tmp2,$tmp,0x1\n\t"
4506             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4507             "movd    $tmp2,$src1\n\t"
4508             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
4509             "movd    $dst,$tmp2\t! mul reduction16I" %}
4510   ins_encode %{
4511     __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister);
4512     __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
4513     __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
4514     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
4515     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
4516     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4517     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4518     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4519     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4520     __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4521     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4522   %}
4523   ins_pipe( pipe_slow );
4524 %}
4525 
4526 #ifdef _LP64
4527 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{
4528   predicate(UseAVX > 2);
4529   match(Set dst (AddReductionVL src1 src2));
4530   effect(TEMP tmp, TEMP tmp2);
4531   format %{ "pshufd  $tmp2,$src2,0xE\n\t"
4532             "vpaddq  $tmp,$src2,$tmp2\n\t"
4533             "movdq   $tmp2,$src1\n\t"
4534             "vpaddq  $tmp2,$tmp,$tmp2\n\t"
4535             "movdq   $dst,$tmp2\t! add reduction2L" %}
4536   ins_encode %{
4537     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4538     __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0);
4539     __ movdq($tmp2$$XMMRegister, $src1$$Register);
4540     __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4541     __ movdq($dst$$Register, $tmp2$$XMMRegister);
4542   %}
4543   ins_pipe( pipe_slow );
4544 %}
4545 
4546 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
4547   predicate(UseAVX > 2);
4548   match(Set dst (AddReductionVL src1 src2));
4549   effect(TEMP tmp, TEMP tmp2);
4550   format %{ "vextracti64x2  $tmp,$src2, 0x1\n\t"
4551             "vpaddq  $tmp2,$tmp,$src2\n\t"
4552             "pshufd  $tmp,$tmp2,0xE\n\t"
4553             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4554             "movdq   $tmp,$src1\n\t"
4555             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4556             "movdq   $dst,$tmp2\t! add reduction4L" %}
4557   ins_encode %{
4558     __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
4559     __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
4560     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
4561     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4562     __ movdq($tmp$$XMMRegister, $src1$$Register);
4563     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4564     __ movdq($dst$$Register, $tmp2$$XMMRegister);
4565   %}
4566   ins_pipe( pipe_slow );
4567 %}
4568 
4569 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
4570   predicate(UseAVX > 2);
4571   match(Set dst (AddReductionVL src1 src2));
4572   effect(TEMP tmp, TEMP tmp2);
4573   format %{ "vextracti64x4  $tmp2,$src2\n\t"
4574             "vpaddq  $tmp2,$tmp2,$src2\n\t"
4575             "vextracti128   $tmp,$tmp2\n\t"
4576             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4577             "pshufd  $tmp,$tmp2,0xE\n\t"
4578             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4579             "movdq   $tmp,$src1\n\t"
4580             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4581             "movdq   $dst,$tmp2\t! add reduction8L" %}
4582   ins_encode %{
4583     __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister);
4584     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
4585     __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
4586     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4587     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
4588     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4589     __ movdq($tmp$$XMMRegister, $src1$$Register);
4590     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4591     __ movdq($dst$$Register, $tmp2$$XMMRegister);
4592   %}
4593   ins_pipe( pipe_slow );
4594 %}
4595 #endif
4596 
4597 instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
4598   predicate(UseSSE >= 1 && UseAVX == 0);
4599   match(Set dst (AddReductionVF src1 src2));
4600   effect(TEMP tmp, TEMP tmp2);
4601   format %{ "movdqu  $tmp,$src1\n\t"
4602             "addss   $tmp,$src2\n\t"
4603             "pshufd  $tmp2,$src2,0x01\n\t"
4604             "addss   $tmp,$tmp2\n\t"
4605             "movdqu  $dst,$tmp\t! add reduction2F" %}
4606   ins_encode %{
4607     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
4608     __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
4609     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
4610     __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4611     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
4612   %}
4613   ins_pipe( pipe_slow );
4614 %}
4615 
4616 instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
4617   predicate(UseAVX > 0);
4618   match(Set dst (AddReductionVF src1 src2));
4619   effect(TEMP tmp2, TEMP tmp);
4620   format %{ "vaddss  $tmp2,$src1,$src2\n\t"
4621             "pshufd  $tmp,$src2,0x01\n\t"
4622             "vaddss  $dst,$tmp2,$tmp\t! add reduction2F" %}
4623   ins_encode %{
4624     __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4625     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4626     __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4627   %}
4628   ins_pipe( pipe_slow );
4629 %}
4630 
4631 instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
4632   predicate(UseSSE >= 1 && UseAVX == 0);
4633   match(Set dst (AddReductionVF src1 src2));
4634   effect(TEMP tmp, TEMP tmp2);
4635   format %{ "movdqu  $tmp,$src1\n\t"
4636             "addss   $tmp,$src2\n\t"
4637             "pshufd  $tmp2,$src2,0x01\n\t"
4638             "addss   $tmp,$tmp2\n\t"
4639             "pshufd  $tmp2,$src2,0x02\n\t"
4640             "addss   $tmp,$tmp2\n\t"
4641             "pshufd  $tmp2,$src2,0x03\n\t"
4642             "addss   $tmp,$tmp2\n\t"
4643             "movdqu  $dst,$tmp\t! add reduction4F" %}
4644   ins_encode %{
4645     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
4646     __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
4647     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
4648     __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4649     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
4650     __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4651     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
4652     __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4653     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
4654   %}
4655   ins_pipe( pipe_slow );
4656 %}
4657 
4658 instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
4659   predicate(UseAVX > 0);
4660   match(Set dst (AddReductionVF src1 src2));
4661   effect(TEMP tmp, TEMP tmp2);
4662   format %{ "vaddss  $tmp2,$src1,$src2\n\t"
4663             "pshufd  $tmp,$src2,0x01\n\t"
4664             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4665             "pshufd  $tmp,$src2,0x02\n\t"
4666             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4667             "pshufd  $tmp,$src2,0x03\n\t"
4668             "vaddss  $dst,$tmp2,$tmp\t! add reduction4F" %}
4669   ins_encode %{
4670     __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4671     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4672     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4673     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
4674     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4675     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
4676     __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4677   %}
4678   ins_pipe( pipe_slow );
4679 %}
4680 
4681 instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
4682   predicate(UseAVX > 0);
4683   match(Set dst (AddReductionVF src1 src2));
4684   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4685   format %{ "vaddss  $tmp2,$src1,$src2\n\t"
4686             "pshufd  $tmp,$src2,0x01\n\t"
4687             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4688             "pshufd  $tmp,$src2,0x02\n\t"
4689             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4690             "pshufd  $tmp,$src2,0x03\n\t"
4691             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4692             "vextractf128  $tmp3,$src2\n\t"
4693             "vaddss  $tmp2,$tmp2,$tmp3\n\t"
4694             "pshufd  $tmp,$tmp3,0x01\n\t"
4695             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4696             "pshufd  $tmp,$tmp3,0x02\n\t"
4697             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4698             "pshufd  $tmp,$tmp3,0x03\n\t"
4699             "vaddss  $dst,$tmp2,$tmp\t! add reduction8F" %}
4700   ins_encode %{
4701     __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4702     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4703     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4704     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
4705     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4706     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
4707     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4708     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
4709     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4710     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4711     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4712     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4713     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4714     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4715     __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4716   %}
4717   ins_pipe( pipe_slow );
4718 %}
4719 
4720 instruct radd16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
4721   predicate(UseAVX > 2);
4722   match(Set dst (AddReductionVF src1 src2));
4723   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4724   format %{ "vaddss  $tmp2,$src1,$src2\n\t"
4725             "pshufd  $tmp,$src2,0x01\n\t"
4726             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4727             "pshufd  $tmp,$src2,0x02\n\t"
4728             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4729             "pshufd  $tmp,$src2,0x03\n\t"
4730             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4731             "vextractf64x2  $tmp3,$src2, 0x1\n\t"
4732             "vaddss  $tmp2,$tmp2,$tmp3\n\t"
4733             "pshufd  $tmp,$tmp3,0x01\n\t"
4734             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4735             "pshufd  $tmp,$tmp3,0x02\n\t"
4736             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4737             "pshufd  $tmp,$tmp3,0x03\n\t"
4738             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4739             "vextractf64x2  $tmp3,$src2, 0x2\n\t"
4740             "vaddss  $tmp2,$tmp2,$tmp3\n\t"
4741             "pshufd  $tmp,$tmp3,0x01\n\t"
4742             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4743             "pshufd  $tmp,$tmp3,0x02\n\t"
4744             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4745             "pshufd  $tmp,$tmp3,0x03\n\t"
4746             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4747             "vextractf64x2  $tmp3,$src2, 0x3\n\t"
4748             "vaddss  $tmp2,$tmp2,$tmp3\n\t"
4749             "pshufd  $tmp,$tmp3,0x01\n\t"
4750             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4751             "pshufd  $tmp,$tmp3,0x02\n\t"
4752             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4753             "pshufd  $tmp,$tmp3,0x03\n\t"
4754             "vaddss  $dst,$tmp2,$tmp\t! add reduction16F" %}
4755   ins_encode %{
4756     __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4757     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4758     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4759     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
4760     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4761     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
4762     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4763     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
4764     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4765     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4766     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4767     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4768     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4769     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4770     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4771     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
4772     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4773     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4774     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4775     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4776     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4777     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4778     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4779     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
4780     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4781     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4782     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4783     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4784     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4785     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4786     __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4787   %}
4788   ins_pipe( pipe_slow );
4789 %}
4790 
4791 instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
4792   predicate(UseSSE >= 1 && UseAVX == 0);
4793   match(Set dst (AddReductionVD src1 src2));
4794   effect(TEMP tmp, TEMP dst);
4795   format %{ "movdqu  $tmp,$src1\n\t"
4796             "addsd   $tmp,$src2\n\t"
4797             "pshufd  $dst,$src2,0xE\n\t"
4798             "addsd   $dst,$tmp\t! add reduction2D" %}
4799   ins_encode %{
4800     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
4801     __ addsd($tmp$$XMMRegister, $src2$$XMMRegister);
4802     __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
4803     __ addsd($dst$$XMMRegister, $tmp$$XMMRegister);
4804   %}
4805   ins_pipe( pipe_slow );
4806 %}
4807 
4808 instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
4809   predicate(UseAVX > 0);
4810   match(Set dst (AddReductionVD src1 src2));
4811   effect(TEMP tmp, TEMP tmp2);
4812   format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
4813             "pshufd  $tmp,$src2,0xE\n\t"
4814             "vaddsd  $dst,$tmp2,$tmp\t! add reduction2D" %}
4815   ins_encode %{
4816     __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4817     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
4818     __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4819   %}
4820   ins_pipe( pipe_slow );
4821 %}
4822 
4823 instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
4824   predicate(UseAVX > 0);
4825   match(Set dst (AddReductionVD src1 src2));
4826   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4827   format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
4828             "pshufd  $tmp,$src2,0xE\n\t"
4829             "vaddsd  $tmp2,$tmp2,$tmp\n\t"
4830             "vextractf128  $tmp3,$src2\n\t"
4831             "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
4832             "pshufd  $tmp,$tmp3,0xE\n\t"
4833             "vaddsd  $dst,$tmp2,$tmp\t! add reduction4D" %}
4834   ins_encode %{
4835     __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4836     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
4837     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4838     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
4839     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4840     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4841     __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4842   %}
4843   ins_pipe( pipe_slow );
4844 %}
4845 
4846 instruct rvadd8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{
4847   predicate(UseAVX > 2);
4848   match(Set dst (AddReductionVD src1 src2));
4849   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4850   format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
4851             "pshufd  $tmp,$src2,0xE\n\t"
4852             "vaddsd  $tmp2,$tmp2,$tmp\n\t"
4853             "vextractf64x2  $tmp3,$src2, 0x1\n\t"
4854             "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
4855             "pshufd  $tmp,$tmp3,0xE\n\t"
4856             "vaddsd  $tmp2,$tmp2,$tmp\n\t"
4857             "vextractf64x2  $tmp3,$src2, 0x2\n\t"
4858             "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
4859             "pshufd  $tmp,$tmp3,0xE\n\t"
4860             "vaddsd  $tmp2,$tmp2,$tmp\n\t"
4861             "vextractf64x2  $tmp3,$src2, 0x3\n\t"
4862             "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
4863             "pshufd  $tmp,$tmp3,0xE\n\t"
4864             "vaddsd  $dst,$tmp2,$tmp\t! add reduction8D" %}
4865   ins_encode %{
4866     __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4867     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
4868     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4869     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
4870     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4871     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4872     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4873     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
4874     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4875     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4876     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4877     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
4878     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4879     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4880     __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4881   %}
4882   ins_pipe( pipe_slow );
4883 %}
4884 
4885 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
4886   predicate(UseSSE > 3 && UseAVX == 0);
4887   match(Set dst (MulReductionVI src1 src2));
4888   effect(TEMP tmp, TEMP tmp2);
4889   format %{ "pshufd  $tmp2,$src2,0x1\n\t"
4890             "pmulld  $tmp2,$src2\n\t"
4891             "movd    $tmp,$src1\n\t"
4892             "pmulld  $tmp2,$tmp\n\t"
4893             "movd    $dst,$tmp2\t! mul reduction2I" %}
4894   ins_encode %{
4895     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
4896     __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
4897     __ movdl($tmp$$XMMRegister, $src1$$Register);
4898     __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
4899     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4900   %}
4901   ins_pipe( pipe_slow );
4902 %}
4903 
4904 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
4905   predicate(UseAVX > 0);
4906   match(Set dst (MulReductionVI src1 src2));
4907   effect(TEMP tmp, TEMP tmp2);
4908   format %{ "pshufd   $tmp2,$src2,0x1\n\t"
4909             "vpmulld  $tmp,$src2,$tmp2\n\t"
4910             "movd     $tmp2,$src1\n\t"
4911             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
4912             "movd     $dst,$tmp2\t! mul reduction2I" %}
4913   ins_encode %{
4914     int vector_len = 0;
4915     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
4916     __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4917     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4918     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4919     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4920   %}
4921   ins_pipe( pipe_slow );
4922 %}
4923 
4924 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
4925   predicate(UseSSE > 3 && UseAVX == 0);
4926   match(Set dst (MulReductionVI src1 src2));
4927   effect(TEMP tmp, TEMP tmp2);
4928   format %{ "pshufd  $tmp2,$src2,0xE\n\t"
4929             "pmulld  $tmp2,$src2\n\t"
4930             "pshufd  $tmp,$tmp2,0x1\n\t"
4931             "pmulld  $tmp2,$tmp\n\t"
4932             "movd    $tmp,$src1\n\t"
4933             "pmulld  $tmp2,$tmp\n\t"
4934             "movd    $dst,$tmp2\t! mul reduction4I" %}
4935   ins_encode %{
4936     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4937     __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
4938     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1);
4939     __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
4940     __ movdl($tmp$$XMMRegister, $src1$$Register);
4941     __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
4942     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4943   %}
4944   ins_pipe( pipe_slow );
4945 %}
4946 
4947 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
4948   predicate(UseAVX > 0);
4949   match(Set dst (MulReductionVI src1 src2));
4950   effect(TEMP tmp, TEMP tmp2);
4951   format %{ "pshufd   $tmp2,$src2,0xE\n\t"
4952             "vpmulld  $tmp,$src2,$tmp2\n\t"
4953             "pshufd   $tmp2,$tmp,0x1\n\t"
4954             "vpmulld  $tmp,$tmp,$tmp2\n\t"
4955             "movd     $tmp2,$src1\n\t"
4956             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
4957             "movd     $dst,$tmp2\t! mul reduction4I" %}
4958   ins_encode %{
4959     int vector_len = 0;
4960     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4961     __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4962     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4963     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4964     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4965     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4966     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4967   %}
4968   ins_pipe( pipe_slow );
4969 %}
4970 
4971 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
4972   predicate(UseAVX > 0);
4973   match(Set dst (MulReductionVI src1 src2));
4974   effect(TEMP tmp, TEMP tmp2);
4975   format %{ "vextracti128  $tmp,$src2\n\t"
4976             "vpmulld  $tmp,$tmp,$src2\n\t"
4977             "pshufd   $tmp2,$tmp,0xE\n\t"
4978             "vpmulld  $tmp,$tmp,$tmp2\n\t"
4979             "pshufd   $tmp2,$tmp,0x1\n\t"
4980             "vpmulld  $tmp,$tmp,$tmp2\n\t"
4981             "movd     $tmp2,$src1\n\t"
4982             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
4983             "movd     $dst,$tmp2\t! mul reduction8I" %}
4984   ins_encode %{
4985     int vector_len = 0;
4986     __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
4987     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
4988     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
4989     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4990     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4991     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4992     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4993     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4994     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4995   %}
4996   ins_pipe( pipe_slow );
4997 %}
4998 
4999 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
5000   predicate(UseAVX > 2);
5001   match(Set dst (MulReductionVI src1 src2));
5002   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
5003   format %{ "vextracti64x4  $tmp3,$src2\n\t"
5004             "vpmulld  $tmp3,$tmp3,$src2\n\t"
5005             "vextracti128   $tmp,$tmp3\n\t"
5006             "vpmulld  $tmp,$tmp,$src2\n\t"
5007             "pshufd   $tmp2,$tmp,0xE\n\t"
5008             "vpmulld  $tmp,$tmp,$tmp2\n\t"
5009             "pshufd   $tmp2,$tmp,0x1\n\t"
5010             "vpmulld  $tmp,$tmp,$tmp2\n\t"
5011             "movd     $tmp2,$src1\n\t"
5012             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
5013             "movd     $dst,$tmp2\t! mul reduction16I" %}
5014   ins_encode %{
5015     __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister);
5016     __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
5017     __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
5018     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
5019     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
5020     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
5021     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
5022     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
5023     __ movdl($tmp2$$XMMRegister, $src1$$Register);
5024     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
5025     __ movdl($dst$$Register, $tmp2$$XMMRegister);
5026   %}
5027   ins_pipe( pipe_slow );
5028 %}
5029 
5030 #ifdef _LP64
5031 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{
5032   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
5033   match(Set dst (MulReductionVL src1 src2));
5034   effect(TEMP tmp, TEMP tmp2);
5035   format %{ "pshufd   $tmp2,$src2,0xE\n\t"
5036             "vpmullq  $tmp,$src2,$tmp2\n\t"
5037             "movdq    $tmp2,$src1\n\t"
5038             "vpmullq  $tmp2,$tmp,$tmp2\n\t"
5039             "movdq    $dst,$tmp2\t! mul reduction2L" %}
5040   ins_encode %{
5041     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
5042     __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0);
5043     __ movdq($tmp2$$XMMRegister, $src1$$Register);
5044     __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
5045     __ movdq($dst$$Register, $tmp2$$XMMRegister);
5046   %}
5047   ins_pipe( pipe_slow );
5048 %}
5049 
5050 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
5051   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
5052   match(Set dst (MulReductionVL src1 src2));
5053   effect(TEMP tmp, TEMP tmp2);
5054   format %{ "vextracti64x2  $tmp,$src2, 0x1\n\t"
5055             "vpmullq  $tmp2,$tmp,$src2\n\t"
5056             "pshufd   $tmp,$tmp2,0xE\n\t"
5057             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
5058             "movdq    $tmp,$src1\n\t"
5059             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
5060             "movdq    $dst,$tmp2\t! mul reduction4L" %}
5061   ins_encode %{
5062     __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
5063     __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
5064     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
5065     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
5066     __ movdq($tmp$$XMMRegister, $src1$$Register);
5067     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
5068     __ movdq($dst$$Register, $tmp2$$XMMRegister);
5069   %}
5070   ins_pipe( pipe_slow );
5071 %}
5072 
5073 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
5074   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
5075   match(Set dst (MulReductionVL src1 src2));
5076   effect(TEMP tmp, TEMP tmp2);
5077   format %{ "vextracti64x4  $tmp2,$src2\n\t"
5078             "vpmullq  $tmp2,$tmp2,$src2\n\t"
5079             "vextracti128   $tmp,$tmp2\n\t"
5080             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
5081             "pshufd   $tmp,$tmp2,0xE\n\t"
5082             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
5083             "movdq    $tmp,$src1\n\t"
5084             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
5085             "movdq    $dst,$tmp2\t! mul reduction8L" %}
5086   ins_encode %{
5087     __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister);
5088     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
5089     __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
5090     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
5091     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
5092     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
5093     __ movdq($tmp$$XMMRegister, $src1$$Register);
5094     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
5095     __ movdq($dst$$Register, $tmp2$$XMMRegister);
5096   %}
5097   ins_pipe( pipe_slow );
5098 %}
5099 #endif
5100 
5101 instruct rsmul2F_reduction(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
5102   predicate(UseSSE >= 1 && UseAVX == 0);
5103   match(Set dst (MulReductionVF src1 src2));
5104   effect(TEMP tmp, TEMP tmp2);
5105   format %{ "movdqu  $tmp,$src1\n\t"
5106             "mulss   $tmp,$src2\n\t"
5107             "pshufd  $tmp2,$src2,0x01\n\t"
5108             "mulss   $tmp,$tmp2\n\t"
5109             "movdqu  $dst,$tmp\t! mul reduction2F" %}
5110   ins_encode %{
5111     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
5112     __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
5113     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
5114     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
5115     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
5116   %}
5117   ins_pipe( pipe_slow );
5118 %}
5119 
5120 instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
5121   predicate(UseAVX > 0);
5122   match(Set dst (MulReductionVF src1 src2));
5123   effect(TEMP tmp, TEMP tmp2);
5124   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
5125             "pshufd  $tmp,$src2,0x01\n\t"
5126             "vmulss  $dst,$tmp2,$tmp\t! mul reduction2F" %}
5127   ins_encode %{
5128     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
5129     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5130     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5131   %}
5132   ins_pipe( pipe_slow );
5133 %}
5134 
5135 instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
5136   predicate(UseSSE >= 1 && UseAVX == 0);
5137   match(Set dst (MulReductionVF src1 src2));
5138   effect(TEMP tmp, TEMP tmp2);
5139   format %{ "movdqu  $tmp,$src1\n\t"
5140             "mulss   $tmp,$src2\n\t"
5141             "pshufd  $tmp2,$src2,0x01\n\t"
5142             "mulss   $tmp,$tmp2\n\t"
5143             "pshufd  $tmp2,$src2,0x02\n\t"
5144             "mulss   $tmp,$tmp2\n\t"
5145             "pshufd  $tmp2,$src2,0x03\n\t"
5146             "mulss   $tmp,$tmp2\n\t"
5147             "movdqu  $dst,$tmp\t! mul reduction4F" %}
5148   ins_encode %{
5149     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
5150     __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
5151     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
5152     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
5153     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
5154     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
5155     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
5156     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
5157     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
5158   %}
5159   ins_pipe( pipe_slow );
5160 %}
5161 
5162 instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
5163   predicate(UseAVX > 0);
5164   match(Set dst (MulReductionVF src1 src2));
5165   effect(TEMP tmp, TEMP tmp2);
5166   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
5167             "pshufd  $tmp,$src2,0x01\n\t"
5168             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5169             "pshufd  $tmp,$src2,0x02\n\t"
5170             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5171             "pshufd  $tmp,$src2,0x03\n\t"
5172             "vmulss  $dst,$tmp2,$tmp\t! mul reduction4F" %}
5173   ins_encode %{
5174     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
5175     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5176     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5177     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5178     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5179     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5180     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5181   %}
5182   ins_pipe( pipe_slow );
5183 %}
5184 
5185 instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
5186   predicate(UseAVX > 0);
5187   match(Set dst (MulReductionVF src1 src2));
5188   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
5189   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
5190             "pshufd  $tmp,$src2,0x01\n\t"
5191             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5192             "pshufd  $tmp,$src2,0x02\n\t"
5193             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5194             "pshufd  $tmp,$src2,0x03\n\t"
5195             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5196             "vextractf128  $tmp3,$src2\n\t"
5197             "vmulss  $tmp2,$tmp2,$tmp3\n\t"
5198             "pshufd  $tmp,$tmp3,0x01\n\t"
5199             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5200             "pshufd  $tmp,$tmp3,0x02\n\t"
5201             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5202             "pshufd  $tmp,$tmp3,0x03\n\t"
5203             "vmulss  $dst,$tmp2,$tmp\t! mul reduction8F" %}
5204   ins_encode %{
5205     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
5206     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5207     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5208     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5209     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5210     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5211     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5212     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
5213     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
5214     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
5215     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5216     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
5217     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5218     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
5219     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5220   %}
5221   ins_pipe( pipe_slow );
5222 %}
5223 
5224 instruct rvmul16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
5225   predicate(UseAVX > 2);
5226   match(Set dst (MulReductionVF src1 src2));
5227   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
5228   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
5229             "pshufd  $tmp,$src2,0x01\n\t"
5230             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5231             "pshufd  $tmp,$src2,0x02\n\t"
5232             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5233             "pshufd  $tmp,$src2,0x03\n\t"
5234             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5235             "vextractf32x4  $tmp3,$src2, 0x1\n\t"
5236             "vmulss  $tmp2,$tmp2,$tmp3\n\t"
5237             "pshufd  $tmp,$tmp3,0x01\n\t"
5238             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5239             "pshufd  $tmp,$tmp3,0x02\n\t"
5240             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5241             "pshufd  $tmp,$tmp3,0x03\n\t"
5242             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5243             "vextractf32x4  $tmp3,$src2, 0x2\n\t"
5244             "vmulss  $tmp2,$tmp2,$tmp3\n\t"
5245             "pshufd  $tmp,$tmp3,0x01\n\t"
5246             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5247             "pshufd  $tmp,$tmp3,0x02\n\t"
5248             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5249             "pshufd  $tmp,$tmp3,0x03\n\t"
5250             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5251             "vextractf32x4  $tmp3,$src2, 0x3\n\t"
5252             "vmulss  $tmp2,$tmp2,$tmp3\n\t"
5253             "pshufd  $tmp,$tmp3,0x01\n\t"
5254             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5255             "pshufd  $tmp,$tmp3,0x02\n\t"
5256             "vmulss  $tmp2,$tmp2,$tmp\n\t"
5257             "pshufd  $tmp,$tmp3,0x03\n\t"
5258             "vmulss  $dst,$tmp2,$tmp\t! mul reduction16F" %}
5259   ins_encode %{
5260     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
5261     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
5262     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5263     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
5264     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5265     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
5266     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5267     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
5268     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
5269     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
5270     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5271     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
5272     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5273     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
5274     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5275     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
5276     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
5277     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
5278     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5279     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
5280     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5281     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
5282     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5283     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
5284     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
5285     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
5286     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5287     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
5288     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5289     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
5290     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5291   %}
5292   ins_pipe( pipe_slow );
5293 %}
5294 
5295 instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
5296   predicate(UseSSE >= 1 && UseAVX == 0);
5297   match(Set dst (MulReductionVD src1 src2));
5298   effect(TEMP tmp, TEMP dst);
5299   format %{ "movdqu  $tmp,$src1\n\t"
5300             "mulsd   $tmp,$src2\n\t"
5301             "pshufd  $dst,$src2,0xE\n\t"
5302             "mulsd   $dst,$tmp\t! mul reduction2D" %}
5303   ins_encode %{
5304     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
5305     __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister);
5306     __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
5307     __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister);
5308   %}
5309   ins_pipe( pipe_slow );
5310 %}
5311 
5312 instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
5313   predicate(UseAVX > 0);
5314   match(Set dst (MulReductionVD src1 src2));
5315   effect(TEMP tmp, TEMP tmp2);
5316   format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
5317             "pshufd  $tmp,$src2,0xE\n\t"
5318             "vmulsd  $dst,$tmp2,$tmp\t! mul reduction2D" %}
5319   ins_encode %{
5320     __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
5321     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5322     __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5323   %}
5324   ins_pipe( pipe_slow );
5325 %}
5326 
5327 instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
5328   predicate(UseAVX > 0);
5329   match(Set dst (MulReductionVD src1 src2));
5330   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
5331   format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
5332             "pshufd  $tmp,$src2,0xE\n\t"
5333             "vmulsd  $tmp2,$tmp2,$tmp\n\t"
5334             "vextractf128  $tmp3,$src2\n\t"
5335             "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
5336             "pshufd  $tmp,$tmp3,0xE\n\t"
5337             "vmulsd  $dst,$tmp2,$tmp\t! mul reduction4D" %}
5338   ins_encode %{
5339     __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
5340     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5341     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5342     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
5343     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
5344     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
5345     __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5346   %}
5347   ins_pipe( pipe_slow );
5348 %}
5349 
5350 instruct rvmul8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{
5351   predicate(UseAVX > 2);
5352   match(Set dst (MulReductionVD src1 src2));
5353   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
5354   format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
5355             "pshufd  $tmp,$src2,0xE\n\t"
5356             "vmulsd  $tmp2,$tmp2,$tmp\n\t"
5357             "vextractf64x2  $tmp3,$src2, 0x1\n\t"
5358             "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
5359             "pshufd  $tmp,$src2,0xE\n\t"
5360             "vmulsd  $tmp2,$tmp2,$tmp\n\t"
5361             "vextractf64x2  $tmp3,$src2, 0x2\n\t"
5362             "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
5363             "pshufd  $tmp,$tmp3,0xE\n\t"
5364             "vmulsd  $tmp2,$tmp2,$tmp\n\t"
5365             "vextractf64x2  $tmp3,$src2, 0x3\n\t"
5366             "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
5367             "pshufd  $tmp,$tmp3,0xE\n\t"
5368             "vmulsd  $dst,$tmp2,$tmp\t! mul reduction8D" %}
5369   ins_encode %{
5370     __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
5371     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
5372     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5373     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
5374     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
5375     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
5376     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5377     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
5378     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
5379     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
5380     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5381     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
5382     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
5383     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
5384     __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
5385   %}
5386   ins_pipe( pipe_slow );
5387 %}
5388 
5389 // ====================VECTOR ARITHMETIC=======================================
5390 
5391 // --------------------------------- ADD --------------------------------------
5392 
5393 // Bytes vector add
5394 instruct vadd4B(vecS dst, vecS src) %{
5395   predicate(n->as_Vector()->length() == 4);
5396   match(Set dst (AddVB dst src));
5397   format %{ "paddb   $dst,$src\t! add packed4B" %}
5398   ins_encode %{
5399     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
5400   %}
5401   ins_pipe( pipe_slow );
5402 %}
5403 
5404 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
5405   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5406   match(Set dst (AddVB src1 src2));
5407   format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
5408   ins_encode %{
5409     int vector_len = 0;
5410     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5411   %}
5412   ins_pipe( pipe_slow );
5413 %}
5414 
5415 instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{
5416   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5417   match(Set dst (AddVB src (LoadVector mem)));
5418   format %{ "vpaddb  $dst,$src,$mem\t! add packed4B" %}
5419   ins_encode %{
5420     int vector_len = 0;
5421     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5422   %}
5423   ins_pipe( pipe_slow );
5424 %}
5425 
5426 instruct vadd8B(vecD dst, vecD src) %{
5427   predicate(n->as_Vector()->length() == 8);
5428   match(Set dst (AddVB dst src));
5429   format %{ "paddb   $dst,$src\t! add packed8B" %}
5430   ins_encode %{
5431     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
5432   %}
5433   ins_pipe( pipe_slow );
5434 %}
5435 
5436 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
5437   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5438   match(Set dst (AddVB src1 src2));
5439   format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
5440   ins_encode %{
5441     int vector_len = 0;
5442     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5443   %}
5444   ins_pipe( pipe_slow );
5445 %}
5446 
5447 instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{
5448   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5449   match(Set dst (AddVB src (LoadVector mem)));
5450   format %{ "vpaddb  $dst,$src,$mem\t! add packed8B" %}
5451   ins_encode %{
5452     int vector_len = 0;
5453     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5454   %}
5455   ins_pipe( pipe_slow );
5456 %}
5457 
5458 instruct vadd16B(vecX dst, vecX src) %{
5459   predicate(n->as_Vector()->length() == 16);
5460   match(Set dst (AddVB dst src));
5461   format %{ "paddb   $dst,$src\t! add packed16B" %}
5462   ins_encode %{
5463     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
5464   %}
5465   ins_pipe( pipe_slow );
5466 %}
5467 
5468 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
5469   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
5470   match(Set dst (AddVB src1 src2));
5471   format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
5472   ins_encode %{
5473     int vector_len = 0;
5474     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5475   %}
5476   ins_pipe( pipe_slow );
5477 %}
5478 
5479 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
5480   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
5481   match(Set dst (AddVB src (LoadVector mem)));
5482   format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
5483   ins_encode %{
5484     int vector_len = 0;
5485     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5486   %}
5487   ins_pipe( pipe_slow );
5488 %}
5489 
5490 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
5491   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
5492   match(Set dst (AddVB src1 src2));
5493   format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
5494   ins_encode %{
5495     int vector_len = 1;
5496     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5497   %}
5498   ins_pipe( pipe_slow );
5499 %}
5500 
5501 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
5502   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
5503   match(Set dst (AddVB src (LoadVector mem)));
5504   format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
5505   ins_encode %{
5506     int vector_len = 1;
5507     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5508   %}
5509   ins_pipe( pipe_slow );
5510 %}
5511 
5512 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
5513   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
5514   match(Set dst (AddVB src1 src2));
5515   format %{ "vpaddb  $dst,$src1,$src2\t! add packed64B" %}
5516   ins_encode %{
5517     int vector_len = 2;
5518     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5519   %}
5520   ins_pipe( pipe_slow );
5521 %}
5522 
5523 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{
5524   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
5525   match(Set dst (AddVB src (LoadVector mem)));
5526   format %{ "vpaddb  $dst,$src,$mem\t! add packed64B" %}
5527   ins_encode %{
5528     int vector_len = 2;
5529     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5530   %}
5531   ins_pipe( pipe_slow );
5532 %}
5533 
5534 // Shorts/Chars vector add
5535 instruct vadd2S(vecS dst, vecS src) %{
5536   predicate(n->as_Vector()->length() == 2);
5537   match(Set dst (AddVS dst src));
5538   format %{ "paddw   $dst,$src\t! add packed2S" %}
5539   ins_encode %{
5540     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
5541   %}
5542   ins_pipe( pipe_slow );
5543 %}
5544 
5545 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
5546   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5547   match(Set dst (AddVS src1 src2));
5548   format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
5549   ins_encode %{
5550     int vector_len = 0;
5551     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5552   %}
5553   ins_pipe( pipe_slow );
5554 %}
5555 
5556 instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{
5557   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5558   match(Set dst (AddVS src (LoadVector mem)));
5559   format %{ "vpaddw  $dst,$src,$mem\t! add packed2S" %}
5560   ins_encode %{
5561     int vector_len = 0;
5562     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5563   %}
5564   ins_pipe( pipe_slow );
5565 %}
5566 
5567 instruct vadd4S(vecD dst, vecD src) %{
5568   predicate(n->as_Vector()->length() == 4);
5569   match(Set dst (AddVS dst src));
5570   format %{ "paddw   $dst,$src\t! add packed4S" %}
5571   ins_encode %{
5572     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
5573   %}
5574   ins_pipe( pipe_slow );
5575 %}
5576 
5577 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
5578   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5579   match(Set dst (AddVS src1 src2));
5580   format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
5581   ins_encode %{
5582     int vector_len = 0;
5583     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5584   %}
5585   ins_pipe( pipe_slow );
5586 %}
5587 
5588 instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{
5589   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5590   match(Set dst (AddVS src (LoadVector mem)));
5591   format %{ "vpaddw  $dst,$src,$mem\t! add packed4S" %}
5592   ins_encode %{
5593     int vector_len = 0;
5594     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5595   %}
5596   ins_pipe( pipe_slow );
5597 %}
5598 
5599 instruct vadd8S(vecX dst, vecX src) %{
5600   predicate(n->as_Vector()->length() == 8);
5601   match(Set dst (AddVS dst src));
5602   format %{ "paddw   $dst,$src\t! add packed8S" %}
5603   ins_encode %{
5604     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
5605   %}
5606   ins_pipe( pipe_slow );
5607 %}
5608 
5609 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
5610   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5611   match(Set dst (AddVS src1 src2));
5612   format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
5613   ins_encode %{
5614     int vector_len = 0;
5615     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5616   %}
5617   ins_pipe( pipe_slow );
5618 %}
5619 
5620 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
5621   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5622   match(Set dst (AddVS src (LoadVector mem)));
5623   format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
5624   ins_encode %{
5625     int vector_len = 0;
5626     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5627   %}
5628   ins_pipe( pipe_slow );
5629 %}
5630 
5631 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
5632   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
5633   match(Set dst (AddVS src1 src2));
5634   format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
5635   ins_encode %{
5636     int vector_len = 1;
5637     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5638   %}
5639   ins_pipe( pipe_slow );
5640 %}
5641 
5642 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
5643   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
5644   match(Set dst (AddVS src (LoadVector mem)));
5645   format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
5646   ins_encode %{
5647     int vector_len = 1;
5648     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5649   %}
5650   ins_pipe( pipe_slow );
5651 %}
5652 
5653 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
5654   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
5655   match(Set dst (AddVS src1 src2));
5656   format %{ "vpaddw  $dst,$src1,$src2\t! add packed32S" %}
5657   ins_encode %{
5658     int vector_len = 2;
5659     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5660   %}
5661   ins_pipe( pipe_slow );
5662 %}
5663 
5664 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{
5665   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
5666   match(Set dst (AddVS src (LoadVector mem)));
5667   format %{ "vpaddw  $dst,$src,$mem\t! add packed32S" %}
5668   ins_encode %{
5669     int vector_len = 2;
5670     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5671   %}
5672   ins_pipe( pipe_slow );
5673 %}
5674 
5675 // Integers vector add
5676 instruct vadd2I(vecD dst, vecD src) %{
5677   predicate(n->as_Vector()->length() == 2);
5678   match(Set dst (AddVI dst src));
5679   format %{ "paddd   $dst,$src\t! add packed2I" %}
5680   ins_encode %{
5681     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
5682   %}
5683   ins_pipe( pipe_slow );
5684 %}
5685 
5686 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
5687   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5688   match(Set dst (AddVI src1 src2));
5689   format %{ "vpaddd  $dst,$src1,$src2\t! add packed2I" %}
5690   ins_encode %{
5691     int vector_len = 0;
5692     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5693   %}
5694   ins_pipe( pipe_slow );
5695 %}
5696 
5697 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{
5698   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5699   match(Set dst (AddVI src (LoadVector mem)));
5700   format %{ "vpaddd  $dst,$src,$mem\t! add packed2I" %}
5701   ins_encode %{
5702     int vector_len = 0;
5703     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5704   %}
5705   ins_pipe( pipe_slow );
5706 %}
5707 
5708 instruct vadd4I(vecX dst, vecX src) %{
5709   predicate(n->as_Vector()->length() == 4);
5710   match(Set dst (AddVI dst src));
5711   format %{ "paddd   $dst,$src\t! add packed4I" %}
5712   ins_encode %{
5713     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
5714   %}
5715   ins_pipe( pipe_slow );
5716 %}
5717 
5718 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
5719   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5720   match(Set dst (AddVI src1 src2));
5721   format %{ "vpaddd  $dst,$src1,$src2\t! add packed4I" %}
5722   ins_encode %{
5723     int vector_len = 0;
5724     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5725   %}
5726   ins_pipe( pipe_slow );
5727 %}
5728 
5729 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
5730   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5731   match(Set dst (AddVI src (LoadVector mem)));
5732   format %{ "vpaddd  $dst,$src,$mem\t! add packed4I" %}
5733   ins_encode %{
5734     int vector_len = 0;
5735     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5736   %}
5737   ins_pipe( pipe_slow );
5738 %}
5739 
5740 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
5741   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5742   match(Set dst (AddVI src1 src2));
5743   format %{ "vpaddd  $dst,$src1,$src2\t! add packed8I" %}
5744   ins_encode %{
5745     int vector_len = 1;
5746     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5747   %}
5748   ins_pipe( pipe_slow );
5749 %}
5750 
5751 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
5752   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5753   match(Set dst (AddVI src (LoadVector mem)));
5754   format %{ "vpaddd  $dst,$src,$mem\t! add packed8I" %}
5755   ins_encode %{
5756     int vector_len = 1;
5757     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5758   %}
5759   ins_pipe( pipe_slow );
5760 %}
5761 
5762 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{
5763   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5764   match(Set dst (AddVI src1 src2));
5765   format %{ "vpaddd  $dst,$src1,$src2\t! add packed16I" %}
5766   ins_encode %{
5767     int vector_len = 2;
5768     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5769   %}
5770   ins_pipe( pipe_slow );
5771 %}
5772 
5773 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{
5774   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5775   match(Set dst (AddVI src (LoadVector mem)));
5776   format %{ "vpaddd  $dst,$src,$mem\t! add packed16I" %}
5777   ins_encode %{
5778     int vector_len = 2;
5779     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5780   %}
5781   ins_pipe( pipe_slow );
5782 %}
5783 
5784 // Longs vector add
5785 instruct vadd2L(vecX dst, vecX src) %{
5786   predicate(n->as_Vector()->length() == 2);
5787   match(Set dst (AddVL dst src));
5788   format %{ "paddq   $dst,$src\t! add packed2L" %}
5789   ins_encode %{
5790     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
5791   %}
5792   ins_pipe( pipe_slow );
5793 %}
5794 
5795 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
5796   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5797   match(Set dst (AddVL src1 src2));
5798   format %{ "vpaddq  $dst,$src1,$src2\t! add packed2L" %}
5799   ins_encode %{
5800     int vector_len = 0;
5801     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5802   %}
5803   ins_pipe( pipe_slow );
5804 %}
5805 
5806 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
5807   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5808   match(Set dst (AddVL src (LoadVector mem)));
5809   format %{ "vpaddq  $dst,$src,$mem\t! add packed2L" %}
5810   ins_encode %{
5811     int vector_len = 0;
5812     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5813   %}
5814   ins_pipe( pipe_slow );
5815 %}
5816 
5817 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
5818   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
5819   match(Set dst (AddVL src1 src2));
5820   format %{ "vpaddq  $dst,$src1,$src2\t! add packed4L" %}
5821   ins_encode %{
5822     int vector_len = 1;
5823     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5824   %}
5825   ins_pipe( pipe_slow );
5826 %}
5827 
5828 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
5829   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
5830   match(Set dst (AddVL src (LoadVector mem)));
5831   format %{ "vpaddq  $dst,$src,$mem\t! add packed4L" %}
5832   ins_encode %{
5833     int vector_len = 1;
5834     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5835   %}
5836   ins_pipe( pipe_slow );
5837 %}
5838 
5839 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{
5840   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
5841   match(Set dst (AddVL src1 src2));
5842   format %{ "vpaddq  $dst,$src1,$src2\t! add packed8L" %}
5843   ins_encode %{
5844     int vector_len = 2;
5845     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5846   %}
5847   ins_pipe( pipe_slow );
5848 %}
5849 
5850 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{
5851   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
5852   match(Set dst (AddVL src (LoadVector mem)));
5853   format %{ "vpaddq  $dst,$src,$mem\t! add packed8L" %}
5854   ins_encode %{
5855     int vector_len = 2;
5856     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5857   %}
5858   ins_pipe( pipe_slow );
5859 %}
5860 
5861 // Floats vector add
5862 instruct vadd2F(vecD dst, vecD src) %{
5863   predicate(n->as_Vector()->length() == 2);
5864   match(Set dst (AddVF dst src));
5865   format %{ "addps   $dst,$src\t! add packed2F" %}
5866   ins_encode %{
5867     __ addps($dst$$XMMRegister, $src$$XMMRegister);
5868   %}
5869   ins_pipe( pipe_slow );
5870 %}
5871 
5872 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
5873   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5874   match(Set dst (AddVF src1 src2));
5875   format %{ "vaddps  $dst,$src1,$src2\t! add packed2F" %}
5876   ins_encode %{
5877     int vector_len = 0;
5878     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5879   %}
5880   ins_pipe( pipe_slow );
5881 %}
5882 
5883 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{
5884   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5885   match(Set dst (AddVF src (LoadVector mem)));
5886   format %{ "vaddps  $dst,$src,$mem\t! add packed2F" %}
5887   ins_encode %{
5888     int vector_len = 0;
5889     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5890   %}
5891   ins_pipe( pipe_slow );
5892 %}
5893 
5894 instruct vadd4F(vecX dst, vecX src) %{
5895   predicate(n->as_Vector()->length() == 4);
5896   match(Set dst (AddVF dst src));
5897   format %{ "addps   $dst,$src\t! add packed4F" %}
5898   ins_encode %{
5899     __ addps($dst$$XMMRegister, $src$$XMMRegister);
5900   %}
5901   ins_pipe( pipe_slow );
5902 %}
5903 
5904 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
5905   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5906   match(Set dst (AddVF src1 src2));
5907   format %{ "vaddps  $dst,$src1,$src2\t! add packed4F" %}
5908   ins_encode %{
5909     int vector_len = 0;
5910     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5911   %}
5912   ins_pipe( pipe_slow );
5913 %}
5914 
5915 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
5916   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5917   match(Set dst (AddVF src (LoadVector mem)));
5918   format %{ "vaddps  $dst,$src,$mem\t! add packed4F" %}
5919   ins_encode %{
5920     int vector_len = 0;
5921     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5922   %}
5923   ins_pipe( pipe_slow );
5924 %}
5925 
5926 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
5927   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5928   match(Set dst (AddVF src1 src2));
5929   format %{ "vaddps  $dst,$src1,$src2\t! add packed8F" %}
5930   ins_encode %{
5931     int vector_len = 1;
5932     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5933   %}
5934   ins_pipe( pipe_slow );
5935 %}
5936 
5937 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
5938   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5939   match(Set dst (AddVF src (LoadVector mem)));
5940   format %{ "vaddps  $dst,$src,$mem\t! add packed8F" %}
5941   ins_encode %{
5942     int vector_len = 1;
5943     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5944   %}
5945   ins_pipe( pipe_slow );
5946 %}
5947 
5948 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
5949   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5950   match(Set dst (AddVF src1 src2));
5951   format %{ "vaddps  $dst,$src1,$src2\t! add packed16F" %}
5952   ins_encode %{
5953     int vector_len = 2;
5954     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5955   %}
5956   ins_pipe( pipe_slow );
5957 %}
5958 
5959 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{
5960   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5961   match(Set dst (AddVF src (LoadVector mem)));
5962   format %{ "vaddps  $dst,$src,$mem\t! add packed16F" %}
5963   ins_encode %{
5964     int vector_len = 2;
5965     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5966   %}
5967   ins_pipe( pipe_slow );
5968 %}
5969 
5970 // Doubles vector add
5971 instruct vadd2D(vecX dst, vecX src) %{
5972   predicate(n->as_Vector()->length() == 2);
5973   match(Set dst (AddVD dst src));
5974   format %{ "addpd   $dst,$src\t! add packed2D" %}
5975   ins_encode %{
5976     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
5977   %}
5978   ins_pipe( pipe_slow );
5979 %}
5980 
5981 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
5982   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5983   match(Set dst (AddVD src1 src2));
5984   format %{ "vaddpd  $dst,$src1,$src2\t! add packed2D" %}
5985   ins_encode %{
5986     int vector_len = 0;
5987     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5988   %}
5989   ins_pipe( pipe_slow );
5990 %}
5991 
5992 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
5993   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5994   match(Set dst (AddVD src (LoadVector mem)));
5995   format %{ "vaddpd  $dst,$src,$mem\t! add packed2D" %}
5996   ins_encode %{
5997     int vector_len = 0;
5998     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5999   %}
6000   ins_pipe( pipe_slow );
6001 %}
6002 
6003 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
6004   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6005   match(Set dst (AddVD src1 src2));
6006   format %{ "vaddpd  $dst,$src1,$src2\t! add packed4D" %}
6007   ins_encode %{
6008     int vector_len = 1;
6009     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6010   %}
6011   ins_pipe( pipe_slow );
6012 %}
6013 
6014 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
6015   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6016   match(Set dst (AddVD src (LoadVector mem)));
6017   format %{ "vaddpd  $dst,$src,$mem\t! add packed4D" %}
6018   ins_encode %{
6019     int vector_len = 1;
6020     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6021   %}
6022   ins_pipe( pipe_slow );
6023 %}
6024 
6025 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
6026   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6027   match(Set dst (AddVD src1 src2));
6028   format %{ "vaddpd  $dst,$src1,$src2\t! add packed8D" %}
6029   ins_encode %{
6030     int vector_len = 2;
6031     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6032   %}
6033   ins_pipe( pipe_slow );
6034 %}
6035 
6036 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{
6037   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6038   match(Set dst (AddVD src (LoadVector mem)));
6039   format %{ "vaddpd  $dst,$src,$mem\t! add packed8D" %}
6040   ins_encode %{
6041     int vector_len = 2;
6042     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6043   %}
6044   ins_pipe( pipe_slow );
6045 %}
6046 
6047 // --------------------------------- SUB --------------------------------------
6048 
6049 // Bytes vector sub
6050 instruct vsub4B(vecS dst, vecS src) %{
6051   predicate(n->as_Vector()->length() == 4);
6052   match(Set dst (SubVB dst src));
6053   format %{ "psubb   $dst,$src\t! sub packed4B" %}
6054   ins_encode %{
6055     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
6056   %}
6057   ins_pipe( pipe_slow );
6058 %}
6059 
6060 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
6061   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6062   match(Set dst (SubVB src1 src2));
6063   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
6064   ins_encode %{
6065     int vector_len = 0;
6066     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6067   %}
6068   ins_pipe( pipe_slow );
6069 %}
6070 
6071 instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{
6072   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6073   match(Set dst (SubVB src (LoadVector mem)));
6074   format %{ "vpsubb  $dst,$src,$mem\t! sub packed4B" %}
6075   ins_encode %{
6076     int vector_len = 0;
6077     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6078   %}
6079   ins_pipe( pipe_slow );
6080 %}
6081 
6082 instruct vsub8B(vecD dst, vecD src) %{
6083   predicate(n->as_Vector()->length() == 8);
6084   match(Set dst (SubVB dst src));
6085   format %{ "psubb   $dst,$src\t! sub packed8B" %}
6086   ins_encode %{
6087     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
6088   %}
6089   ins_pipe( pipe_slow );
6090 %}
6091 
6092 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
6093   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6094   match(Set dst (SubVB src1 src2));
6095   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
6096   ins_encode %{
6097     int vector_len = 0;
6098     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6099   %}
6100   ins_pipe( pipe_slow );
6101 %}
6102 
6103 instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{
6104   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6105   match(Set dst (SubVB src (LoadVector mem)));
6106   format %{ "vpsubb  $dst,$src,$mem\t! sub packed8B" %}
6107   ins_encode %{
6108     int vector_len = 0;
6109     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6110   %}
6111   ins_pipe( pipe_slow );
6112 %}
6113 
6114 instruct vsub16B(vecX dst, vecX src) %{
6115   predicate(n->as_Vector()->length() == 16);
6116   match(Set dst (SubVB dst src));
6117   format %{ "psubb   $dst,$src\t! sub packed16B" %}
6118   ins_encode %{
6119     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
6120   %}
6121   ins_pipe( pipe_slow );
6122 %}
6123 
6124 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
6125   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
6126   match(Set dst (SubVB src1 src2));
6127   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
6128   ins_encode %{
6129     int vector_len = 0;
6130     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6131   %}
6132   ins_pipe( pipe_slow );
6133 %}
6134 
6135 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
6136   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
6137   match(Set dst (SubVB src (LoadVector mem)));
6138   format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
6139   ins_encode %{
6140     int vector_len = 0;
6141     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6142   %}
6143   ins_pipe( pipe_slow );
6144 %}
6145 
6146 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
6147   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
6148   match(Set dst (SubVB src1 src2));
6149   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
6150   ins_encode %{
6151     int vector_len = 1;
6152     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6153   %}
6154   ins_pipe( pipe_slow );
6155 %}
6156 
6157 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
6158   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
6159   match(Set dst (SubVB src (LoadVector mem)));
6160   format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
6161   ins_encode %{
6162     int vector_len = 1;
6163     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6164   %}
6165   ins_pipe( pipe_slow );
6166 %}
6167 
6168 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
6169   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
6170   match(Set dst (SubVB src1 src2));
6171   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed64B" %}
6172   ins_encode %{
6173     int vector_len = 2;
6174     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6175   %}
6176   ins_pipe( pipe_slow );
6177 %}
6178 
6179 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{
6180   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
6181   match(Set dst (SubVB src (LoadVector mem)));
6182   format %{ "vpsubb  $dst,$src,$mem\t! sub packed64B" %}
6183   ins_encode %{
6184     int vector_len = 2;
6185     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6186   %}
6187   ins_pipe( pipe_slow );
6188 %}
6189 
6190 // Shorts/Chars vector sub
6191 instruct vsub2S(vecS dst, vecS src) %{
6192   predicate(n->as_Vector()->length() == 2);
6193   match(Set dst (SubVS dst src));
6194   format %{ "psubw   $dst,$src\t! sub packed2S" %}
6195   ins_encode %{
6196     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
6197   %}
6198   ins_pipe( pipe_slow );
6199 %}
6200 
6201 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
6202   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6203   match(Set dst (SubVS src1 src2));
6204   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
6205   ins_encode %{
6206     int vector_len = 0;
6207     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6208   %}
6209   ins_pipe( pipe_slow );
6210 %}
6211 
6212 instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{
6213   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6214   match(Set dst (SubVS src (LoadVector mem)));
6215   format %{ "vpsubw  $dst,$src,$mem\t! sub packed2S" %}
6216   ins_encode %{
6217     int vector_len = 0;
6218     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6219   %}
6220   ins_pipe( pipe_slow );
6221 %}
6222 
6223 instruct vsub4S(vecD dst, vecD src) %{
6224   predicate(n->as_Vector()->length() == 4);
6225   match(Set dst (SubVS dst src));
6226   format %{ "psubw   $dst,$src\t! sub packed4S" %}
6227   ins_encode %{
6228     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
6229   %}
6230   ins_pipe( pipe_slow );
6231 %}
6232 
6233 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
6234   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6235   match(Set dst (SubVS src1 src2));
6236   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
6237   ins_encode %{
6238     int vector_len = 0;
6239     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6240   %}
6241   ins_pipe( pipe_slow );
6242 %}
6243 
6244 instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{
6245   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6246   match(Set dst (SubVS src (LoadVector mem)));
6247   format %{ "vpsubw  $dst,$src,$mem\t! sub packed4S" %}
6248   ins_encode %{
6249     int vector_len = 0;
6250     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6251   %}
6252   ins_pipe( pipe_slow );
6253 %}
6254 
6255 instruct vsub8S(vecX dst, vecX src) %{
6256   predicate(n->as_Vector()->length() == 8);
6257   match(Set dst (SubVS dst src));
6258   format %{ "psubw   $dst,$src\t! sub packed8S" %}
6259   ins_encode %{
6260     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
6261   %}
6262   ins_pipe( pipe_slow );
6263 %}
6264 
6265 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
6266   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6267   match(Set dst (SubVS src1 src2));
6268   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
6269   ins_encode %{
6270     int vector_len = 0;
6271     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6272   %}
6273   ins_pipe( pipe_slow );
6274 %}
6275 
6276 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
6277   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6278   match(Set dst (SubVS src (LoadVector mem)));
6279   format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
6280   ins_encode %{
6281     int vector_len = 0;
6282     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6283   %}
6284   ins_pipe( pipe_slow );
6285 %}
6286 
6287 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
6288   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
6289   match(Set dst (SubVS src1 src2));
6290   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
6291   ins_encode %{
6292     int vector_len = 1;
6293     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6294   %}
6295   ins_pipe( pipe_slow );
6296 %}
6297 
6298 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
6299   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
6300   match(Set dst (SubVS src (LoadVector mem)));
6301   format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
6302   ins_encode %{
6303     int vector_len = 1;
6304     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6305   %}
6306   ins_pipe( pipe_slow );
6307 %}
6308 
6309 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
6310   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
6311   match(Set dst (SubVS src1 src2));
6312   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed32S" %}
6313   ins_encode %{
6314     int vector_len = 2;
6315     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6316   %}
6317   ins_pipe( pipe_slow );
6318 %}
6319 
6320 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{
6321   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
6322   match(Set dst (SubVS src (LoadVector mem)));
6323   format %{ "vpsubw  $dst,$src,$mem\t! sub packed32S" %}
6324   ins_encode %{
6325     int vector_len = 2;
6326     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6327   %}
6328   ins_pipe( pipe_slow );
6329 %}
6330 
6331 // Integers vector sub
6332 instruct vsub2I(vecD dst, vecD src) %{
6333   predicate(n->as_Vector()->length() == 2);
6334   match(Set dst (SubVI dst src));
6335   format %{ "psubd   $dst,$src\t! sub packed2I" %}
6336   ins_encode %{
6337     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
6338   %}
6339   ins_pipe( pipe_slow );
6340 %}
6341 
6342 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
6343   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6344   match(Set dst (SubVI src1 src2));
6345   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed2I" %}
6346   ins_encode %{
6347     int vector_len = 0;
6348     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6349   %}
6350   ins_pipe( pipe_slow );
6351 %}
6352 
6353 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{
6354   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6355   match(Set dst (SubVI src (LoadVector mem)));
6356   format %{ "vpsubd  $dst,$src,$mem\t! sub packed2I" %}
6357   ins_encode %{
6358     int vector_len = 0;
6359     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6360   %}
6361   ins_pipe( pipe_slow );
6362 %}
6363 
6364 instruct vsub4I(vecX dst, vecX src) %{
6365   predicate(n->as_Vector()->length() == 4);
6366   match(Set dst (SubVI dst src));
6367   format %{ "psubd   $dst,$src\t! sub packed4I" %}
6368   ins_encode %{
6369     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
6370   %}
6371   ins_pipe( pipe_slow );
6372 %}
6373 
6374 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
6375   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6376   match(Set dst (SubVI src1 src2));
6377   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed4I" %}
6378   ins_encode %{
6379     int vector_len = 0;
6380     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6381   %}
6382   ins_pipe( pipe_slow );
6383 %}
6384 
6385 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
6386   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6387   match(Set dst (SubVI src (LoadVector mem)));
6388   format %{ "vpsubd  $dst,$src,$mem\t! sub packed4I" %}
6389   ins_encode %{
6390     int vector_len = 0;
6391     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6392   %}
6393   ins_pipe( pipe_slow );
6394 %}
6395 
6396 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
6397   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
6398   match(Set dst (SubVI src1 src2));
6399   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed8I" %}
6400   ins_encode %{
6401     int vector_len = 1;
6402     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6403   %}
6404   ins_pipe( pipe_slow );
6405 %}
6406 
6407 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
6408   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
6409   match(Set dst (SubVI src (LoadVector mem)));
6410   format %{ "vpsubd  $dst,$src,$mem\t! sub packed8I" %}
6411   ins_encode %{
6412     int vector_len = 1;
6413     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6414   %}
6415   ins_pipe( pipe_slow );
6416 %}
6417 
6418 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{
6419   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6420   match(Set dst (SubVI src1 src2));
6421   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed16I" %}
6422   ins_encode %{
6423     int vector_len = 2;
6424     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6425   %}
6426   ins_pipe( pipe_slow );
6427 %}
6428 
6429 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{
6430   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6431   match(Set dst (SubVI src (LoadVector mem)));
6432   format %{ "vpsubd  $dst,$src,$mem\t! sub packed16I" %}
6433   ins_encode %{
6434     int vector_len = 2;
6435     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6436   %}
6437   ins_pipe( pipe_slow );
6438 %}
6439 
6440 // Longs vector sub
6441 instruct vsub2L(vecX dst, vecX src) %{
6442   predicate(n->as_Vector()->length() == 2);
6443   match(Set dst (SubVL dst src));
6444   format %{ "psubq   $dst,$src\t! sub packed2L" %}
6445   ins_encode %{
6446     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
6447   %}
6448   ins_pipe( pipe_slow );
6449 %}
6450 
6451 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
6452   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6453   match(Set dst (SubVL src1 src2));
6454   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed2L" %}
6455   ins_encode %{
6456     int vector_len = 0;
6457     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6458   %}
6459   ins_pipe( pipe_slow );
6460 %}
6461 
6462 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
6463   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6464   match(Set dst (SubVL src (LoadVector mem)));
6465   format %{ "vpsubq  $dst,$src,$mem\t! sub packed2L" %}
6466   ins_encode %{
6467     int vector_len = 0;
6468     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6469   %}
6470   ins_pipe( pipe_slow );
6471 %}
6472 
6473 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
6474   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
6475   match(Set dst (SubVL src1 src2));
6476   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed4L" %}
6477   ins_encode %{
6478     int vector_len = 1;
6479     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6480   %}
6481   ins_pipe( pipe_slow );
6482 %}
6483 
6484 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
6485   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
6486   match(Set dst (SubVL src (LoadVector mem)));
6487   format %{ "vpsubq  $dst,$src,$mem\t! sub packed4L" %}
6488   ins_encode %{
6489     int vector_len = 1;
6490     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6491   %}
6492   ins_pipe( pipe_slow );
6493 %}
6494 
6495 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{
6496   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6497   match(Set dst (SubVL src1 src2));
6498   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed8L" %}
6499   ins_encode %{
6500     int vector_len = 2;
6501     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6502   %}
6503   ins_pipe( pipe_slow );
6504 %}
6505 
6506 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{
6507   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6508   match(Set dst (SubVL src (LoadVector mem)));
6509   format %{ "vpsubq  $dst,$src,$mem\t! sub packed8L" %}
6510   ins_encode %{
6511     int vector_len = 2;
6512     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6513   %}
6514   ins_pipe( pipe_slow );
6515 %}
6516 
6517 // Floats vector sub
6518 instruct vsub2F(vecD dst, vecD src) %{
6519   predicate(n->as_Vector()->length() == 2);
6520   match(Set dst (SubVF dst src));
6521   format %{ "subps   $dst,$src\t! sub packed2F" %}
6522   ins_encode %{
6523     __ subps($dst$$XMMRegister, $src$$XMMRegister);
6524   %}
6525   ins_pipe( pipe_slow );
6526 %}
6527 
6528 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
6529   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6530   match(Set dst (SubVF src1 src2));
6531   format %{ "vsubps  $dst,$src1,$src2\t! sub packed2F" %}
6532   ins_encode %{
6533     int vector_len = 0;
6534     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6535   %}
6536   ins_pipe( pipe_slow );
6537 %}
6538 
6539 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{
6540   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6541   match(Set dst (SubVF src (LoadVector mem)));
6542   format %{ "vsubps  $dst,$src,$mem\t! sub packed2F" %}
6543   ins_encode %{
6544     int vector_len = 0;
6545     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6546   %}
6547   ins_pipe( pipe_slow );
6548 %}
6549 
6550 instruct vsub4F(vecX dst, vecX src) %{
6551   predicate(n->as_Vector()->length() == 4);
6552   match(Set dst (SubVF dst src));
6553   format %{ "subps   $dst,$src\t! sub packed4F" %}
6554   ins_encode %{
6555     __ subps($dst$$XMMRegister, $src$$XMMRegister);
6556   %}
6557   ins_pipe( pipe_slow );
6558 %}
6559 
6560 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
6561   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6562   match(Set dst (SubVF src1 src2));
6563   format %{ "vsubps  $dst,$src1,$src2\t! sub packed4F" %}
6564   ins_encode %{
6565     int vector_len = 0;
6566     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6567   %}
6568   ins_pipe( pipe_slow );
6569 %}
6570 
6571 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
6572   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6573   match(Set dst (SubVF src (LoadVector mem)));
6574   format %{ "vsubps  $dst,$src,$mem\t! sub packed4F" %}
6575   ins_encode %{
6576     int vector_len = 0;
6577     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6578   %}
6579   ins_pipe( pipe_slow );
6580 %}
6581 
6582 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
6583   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6584   match(Set dst (SubVF src1 src2));
6585   format %{ "vsubps  $dst,$src1,$src2\t! sub packed8F" %}
6586   ins_encode %{
6587     int vector_len = 1;
6588     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6589   %}
6590   ins_pipe( pipe_slow );
6591 %}
6592 
6593 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
6594   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6595   match(Set dst (SubVF src (LoadVector mem)));
6596   format %{ "vsubps  $dst,$src,$mem\t! sub packed8F" %}
6597   ins_encode %{
6598     int vector_len = 1;
6599     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6600   %}
6601   ins_pipe( pipe_slow );
6602 %}
6603 
6604 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
6605   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6606   match(Set dst (SubVF src1 src2));
6607   format %{ "vsubps  $dst,$src1,$src2\t! sub packed16F" %}
6608   ins_encode %{
6609     int vector_len = 2;
6610     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6611   %}
6612   ins_pipe( pipe_slow );
6613 %}
6614 
6615 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{
6616   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6617   match(Set dst (SubVF src (LoadVector mem)));
6618   format %{ "vsubps  $dst,$src,$mem\t! sub packed16F" %}
6619   ins_encode %{
6620     int vector_len = 2;
6621     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6622   %}
6623   ins_pipe( pipe_slow );
6624 %}
6625 
6626 // Doubles vector sub
6627 instruct vsub2D(vecX dst, vecX src) %{
6628   predicate(n->as_Vector()->length() == 2);
6629   match(Set dst (SubVD dst src));
6630   format %{ "subpd   $dst,$src\t! sub packed2D" %}
6631   ins_encode %{
6632     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
6633   %}
6634   ins_pipe( pipe_slow );
6635 %}
6636 
6637 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
6638   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6639   match(Set dst (SubVD src1 src2));
6640   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed2D" %}
6641   ins_encode %{
6642     int vector_len = 0;
6643     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6644   %}
6645   ins_pipe( pipe_slow );
6646 %}
6647 
6648 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
6649   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6650   match(Set dst (SubVD src (LoadVector mem)));
6651   format %{ "vsubpd  $dst,$src,$mem\t! sub packed2D" %}
6652   ins_encode %{
6653     int vector_len = 0;
6654     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6655   %}
6656   ins_pipe( pipe_slow );
6657 %}
6658 
6659 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
6660   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6661   match(Set dst (SubVD src1 src2));
6662   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed4D" %}
6663   ins_encode %{
6664     int vector_len = 1;
6665     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6666   %}
6667   ins_pipe( pipe_slow );
6668 %}
6669 
6670 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
6671   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6672   match(Set dst (SubVD src (LoadVector mem)));
6673   format %{ "vsubpd  $dst,$src,$mem\t! sub packed4D" %}
6674   ins_encode %{
6675     int vector_len = 1;
6676     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6677   %}
6678   ins_pipe( pipe_slow );
6679 %}
6680 
6681 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
6682   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6683   match(Set dst (SubVD src1 src2));
6684   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed8D" %}
6685   ins_encode %{
6686     int vector_len = 2;
6687     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6688   %}
6689   ins_pipe( pipe_slow );
6690 %}
6691 
6692 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{
6693   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6694   match(Set dst (SubVD src (LoadVector mem)));
6695   format %{ "vsubpd  $dst,$src,$mem\t! sub packed8D" %}
6696   ins_encode %{
6697     int vector_len = 2;
6698     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6699   %}
6700   ins_pipe( pipe_slow );
6701 %}
6702 
6703 // --------------------------------- MUL --------------------------------------
6704 
6705 // Shorts/Chars vector mul
6706 instruct vmul2S(vecS dst, vecS src) %{
6707   predicate(n->as_Vector()->length() == 2);
6708   match(Set dst (MulVS dst src));
6709   format %{ "pmullw $dst,$src\t! mul packed2S" %}
6710   ins_encode %{
6711     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
6712   %}
6713   ins_pipe( pipe_slow );
6714 %}
6715 
6716 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
6717   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6718   match(Set dst (MulVS src1 src2));
6719   format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
6720   ins_encode %{
6721     int vector_len = 0;
6722     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6723   %}
6724   ins_pipe( pipe_slow );
6725 %}
6726 
6727 instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{
6728   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6729   match(Set dst (MulVS src (LoadVector mem)));
6730   format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %}
6731   ins_encode %{
6732     int vector_len = 0;
6733     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6734   %}
6735   ins_pipe( pipe_slow );
6736 %}
6737 
6738 instruct vmul4S(vecD dst, vecD src) %{
6739   predicate(n->as_Vector()->length() == 4);
6740   match(Set dst (MulVS dst src));
6741   format %{ "pmullw  $dst,$src\t! mul packed4S" %}
6742   ins_encode %{
6743     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
6744   %}
6745   ins_pipe( pipe_slow );
6746 %}
6747 
6748 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
6749   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6750   match(Set dst (MulVS src1 src2));
6751   format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
6752   ins_encode %{
6753     int vector_len = 0;
6754     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6755   %}
6756   ins_pipe( pipe_slow );
6757 %}
6758 
6759 instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{
6760   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6761   match(Set dst (MulVS src (LoadVector mem)));
6762   format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %}
6763   ins_encode %{
6764     int vector_len = 0;
6765     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6766   %}
6767   ins_pipe( pipe_slow );
6768 %}
6769 
6770 instruct vmul8S(vecX dst, vecX src) %{
6771   predicate(n->as_Vector()->length() == 8);
6772   match(Set dst (MulVS dst src));
6773   format %{ "pmullw  $dst,$src\t! mul packed8S" %}
6774   ins_encode %{
6775     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
6776   %}
6777   ins_pipe( pipe_slow );
6778 %}
6779 
6780 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
6781   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6782   match(Set dst (MulVS src1 src2));
6783   format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
6784   ins_encode %{
6785     int vector_len = 0;
6786     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6787   %}
6788   ins_pipe( pipe_slow );
6789 %}
6790 
6791 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
6792   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6793   match(Set dst (MulVS src (LoadVector mem)));
6794   format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
6795   ins_encode %{
6796     int vector_len = 0;
6797     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6798   %}
6799   ins_pipe( pipe_slow );
6800 %}
6801 
6802 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
6803   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
6804   match(Set dst (MulVS src1 src2));
6805   format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
6806   ins_encode %{
6807     int vector_len = 1;
6808     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6809   %}
6810   ins_pipe( pipe_slow );
6811 %}
6812 
6813 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
6814   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
6815   match(Set dst (MulVS src (LoadVector mem)));
6816   format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
6817   ins_encode %{
6818     int vector_len = 1;
6819     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6820   %}
6821   ins_pipe( pipe_slow );
6822 %}
6823 
6824 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
6825   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
6826   match(Set dst (MulVS src1 src2));
6827   format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %}
6828   ins_encode %{
6829     int vector_len = 2;
6830     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6831   %}
6832   ins_pipe( pipe_slow );
6833 %}
6834 
6835 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{
6836   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
6837   match(Set dst (MulVS src (LoadVector mem)));
6838   format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %}
6839   ins_encode %{
6840     int vector_len = 2;
6841     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6842   %}
6843   ins_pipe( pipe_slow );
6844 %}
6845 
6846 // Integers vector mul (sse4_1)
6847 instruct vmul2I(vecD dst, vecD src) %{
6848   predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
6849   match(Set dst (MulVI dst src));
6850   format %{ "pmulld  $dst,$src\t! mul packed2I" %}
6851   ins_encode %{
6852     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
6853   %}
6854   ins_pipe( pipe_slow );
6855 %}
6856 
6857 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
6858   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6859   match(Set dst (MulVI src1 src2));
6860   format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
6861   ins_encode %{
6862     int vector_len = 0;
6863     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6864   %}
6865   ins_pipe( pipe_slow );
6866 %}
6867 
6868 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{
6869   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6870   match(Set dst (MulVI src (LoadVector mem)));
6871   format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %}
6872   ins_encode %{
6873     int vector_len = 0;
6874     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6875   %}
6876   ins_pipe( pipe_slow );
6877 %}
6878 
6879 instruct vmul4I(vecX dst, vecX src) %{
6880   predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
6881   match(Set dst (MulVI dst src));
6882   format %{ "pmulld  $dst,$src\t! mul packed4I" %}
6883   ins_encode %{
6884     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
6885   %}
6886   ins_pipe( pipe_slow );
6887 %}
6888 
6889 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
6890   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6891   match(Set dst (MulVI src1 src2));
6892   format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
6893   ins_encode %{
6894     int vector_len = 0;
6895     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6896   %}
6897   ins_pipe( pipe_slow );
6898 %}
6899 
6900 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
6901   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6902   match(Set dst (MulVI src (LoadVector mem)));
6903   format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
6904   ins_encode %{
6905     int vector_len = 0;
6906     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6907   %}
6908   ins_pipe( pipe_slow );
6909 %}
6910 
6911 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{
6912   predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq());
6913   match(Set dst (MulVL src1 src2));
6914   format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %}
6915   ins_encode %{
6916     int vector_len = 0;
6917     __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6918   %}
6919   ins_pipe( pipe_slow );
6920 %}
6921 
6922 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{
6923   predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq());
6924   match(Set dst (MulVL src (LoadVector mem)));
6925   format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %}
6926   ins_encode %{
6927     int vector_len = 0;
6928     __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6929   %}
6930   ins_pipe( pipe_slow );
6931 %}
6932 
6933 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{
6934   predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq());
6935   match(Set dst (MulVL src1 src2));
6936   format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %}
6937   ins_encode %{
6938     int vector_len = 1;
6939     __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6940   %}
6941   ins_pipe( pipe_slow );
6942 %}
6943 
6944 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{
6945   predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq());
6946   match(Set dst (MulVL src (LoadVector mem)));
6947   format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %}
6948   ins_encode %{
6949     int vector_len = 1;
6950     __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6951   %}
6952   ins_pipe( pipe_slow );
6953 %}
6954 
6955 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{
6956   predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq());
6957   match(Set dst (MulVL src1 src2));
6958   format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %}
6959   ins_encode %{
6960     int vector_len = 2;
6961     __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6962   %}
6963   ins_pipe( pipe_slow );
6964 %}
6965 
6966 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{
6967   predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq());
6968   match(Set dst (MulVL src (LoadVector mem)));
6969   format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %}
6970   ins_encode %{
6971     int vector_len = 2;
6972     __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6973   %}
6974   ins_pipe( pipe_slow );
6975 %}
6976 
6977 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
6978   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
6979   match(Set dst (MulVI src1 src2));
6980   format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
6981   ins_encode %{
6982     int vector_len = 1;
6983     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6984   %}
6985   ins_pipe( pipe_slow );
6986 %}
6987 
6988 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
6989   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
6990   match(Set dst (MulVI src (LoadVector mem)));
6991   format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
6992   ins_encode %{
6993     int vector_len = 1;
6994     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6995   %}
6996   ins_pipe( pipe_slow );
6997 %}
6998 
6999 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{
7000   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7001   match(Set dst (MulVI src1 src2));
7002   format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %}
7003   ins_encode %{
7004     int vector_len = 2;
7005     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7006   %}
7007   ins_pipe( pipe_slow );
7008 %}
7009 
7010 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{
7011   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7012   match(Set dst (MulVI src (LoadVector mem)));
7013   format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %}
7014   ins_encode %{
7015     int vector_len = 2;
7016     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7017   %}
7018   ins_pipe( pipe_slow );
7019 %}
7020 
7021 // Floats vector mul
7022 instruct vmul2F(vecD dst, vecD src) %{
7023   predicate(n->as_Vector()->length() == 2);
7024   match(Set dst (MulVF dst src));
7025   format %{ "mulps   $dst,$src\t! mul packed2F" %}
7026   ins_encode %{
7027     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
7028   %}
7029   ins_pipe( pipe_slow );
7030 %}
7031 
7032 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
7033   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7034   match(Set dst (MulVF src1 src2));
7035   format %{ "vmulps  $dst,$src1,$src2\t! mul packed2F" %}
7036   ins_encode %{
7037     int vector_len = 0;
7038     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7039   %}
7040   ins_pipe( pipe_slow );
7041 %}
7042 
7043 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{
7044   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7045   match(Set dst (MulVF src (LoadVector mem)));
7046   format %{ "vmulps  $dst,$src,$mem\t! mul packed2F" %}
7047   ins_encode %{
7048     int vector_len = 0;
7049     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7050   %}
7051   ins_pipe( pipe_slow );
7052 %}
7053 
7054 instruct vmul4F(vecX dst, vecX src) %{
7055   predicate(n->as_Vector()->length() == 4);
7056   match(Set dst (MulVF dst src));
7057   format %{ "mulps   $dst,$src\t! mul packed4F" %}
7058   ins_encode %{
7059     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
7060   %}
7061   ins_pipe( pipe_slow );
7062 %}
7063 
7064 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
7065   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7066   match(Set dst (MulVF src1 src2));
7067   format %{ "vmulps  $dst,$src1,$src2\t! mul packed4F" %}
7068   ins_encode %{
7069     int vector_len = 0;
7070     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7071   %}
7072   ins_pipe( pipe_slow );
7073 %}
7074 
7075 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
7076   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7077   match(Set dst (MulVF src (LoadVector mem)));
7078   format %{ "vmulps  $dst,$src,$mem\t! mul packed4F" %}
7079   ins_encode %{
7080     int vector_len = 0;
7081     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7082   %}
7083   ins_pipe( pipe_slow );
7084 %}
7085 
7086 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
7087   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7088   match(Set dst (MulVF src1 src2));
7089   format %{ "vmulps  $dst,$src1,$src2\t! mul packed8F" %}
7090   ins_encode %{
7091     int vector_len = 1;
7092     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7093   %}
7094   ins_pipe( pipe_slow );
7095 %}
7096 
7097 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
7098   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7099   match(Set dst (MulVF src (LoadVector mem)));
7100   format %{ "vmulps  $dst,$src,$mem\t! mul packed8F" %}
7101   ins_encode %{
7102     int vector_len = 1;
7103     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7104   %}
7105   ins_pipe( pipe_slow );
7106 %}
7107 
7108 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
7109   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7110   match(Set dst (MulVF src1 src2));
7111   format %{ "vmulps  $dst,$src1,$src2\t! mul packed16F" %}
7112   ins_encode %{
7113     int vector_len = 2;
7114     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7115   %}
7116   ins_pipe( pipe_slow );
7117 %}
7118 
7119 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{
7120   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7121   match(Set dst (MulVF src (LoadVector mem)));
7122   format %{ "vmulps  $dst,$src,$mem\t! mul packed16F" %}
7123   ins_encode %{
7124     int vector_len = 2;
7125     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7126   %}
7127   ins_pipe( pipe_slow );
7128 %}
7129 
7130 // Doubles vector mul
7131 instruct vmul2D(vecX dst, vecX src) %{
7132   predicate(n->as_Vector()->length() == 2);
7133   match(Set dst (MulVD dst src));
7134   format %{ "mulpd   $dst,$src\t! mul packed2D" %}
7135   ins_encode %{
7136     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
7137   %}
7138   ins_pipe( pipe_slow );
7139 %}
7140 
7141 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
7142   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7143   match(Set dst (MulVD src1 src2));
7144   format %{ "vmulpd  $dst,$src1,$src2\t! mul packed2D" %}
7145   ins_encode %{
7146     int vector_len = 0;
7147     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7148   %}
7149   ins_pipe( pipe_slow );
7150 %}
7151 
7152 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
7153   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7154   match(Set dst (MulVD src (LoadVector mem)));
7155   format %{ "vmulpd  $dst,$src,$mem\t! mul packed2D" %}
7156   ins_encode %{
7157     int vector_len = 0;
7158     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7159   %}
7160   ins_pipe( pipe_slow );
7161 %}
7162 
7163 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
7164   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7165   match(Set dst (MulVD src1 src2));
7166   format %{ "vmulpd  $dst,$src1,$src2\t! mul packed4D" %}
7167   ins_encode %{
7168     int vector_len = 1;
7169     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7170   %}
7171   ins_pipe( pipe_slow );
7172 %}
7173 
7174 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
7175   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7176   match(Set dst (MulVD src (LoadVector mem)));
7177   format %{ "vmulpd  $dst,$src,$mem\t! mul packed4D" %}
7178   ins_encode %{
7179     int vector_len = 1;
7180     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7181   %}
7182   ins_pipe( pipe_slow );
7183 %}
7184 
7185 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
7186   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7187   match(Set dst (MulVD src1 src2));
7188   format %{ "vmulpd  $dst k0,$src1,$src2\t! mul packed8D" %}
7189   ins_encode %{
7190     int vector_len = 2;
7191     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7192   %}
7193   ins_pipe( pipe_slow );
7194 %}
7195 
7196 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{
7197   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7198   match(Set dst (MulVD src (LoadVector mem)));
7199   format %{ "vmulpd  $dst k0,$src,$mem\t! mul packed8D" %}
7200   ins_encode %{
7201     int vector_len = 2;
7202     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7203   %}
7204   ins_pipe( pipe_slow );
7205 %}
7206 
7207 // --------------------------------- DIV --------------------------------------
7208 
7209 // Floats vector div
7210 instruct vdiv2F(vecD dst, vecD src) %{
7211   predicate(n->as_Vector()->length() == 2);
7212   match(Set dst (DivVF dst src));
7213   format %{ "divps   $dst,$src\t! div packed2F" %}
7214   ins_encode %{
7215     __ divps($dst$$XMMRegister, $src$$XMMRegister);
7216   %}
7217   ins_pipe( pipe_slow );
7218 %}
7219 
7220 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
7221   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7222   match(Set dst (DivVF src1 src2));
7223   format %{ "vdivps  $dst,$src1,$src2\t! div packed2F" %}
7224   ins_encode %{
7225     int vector_len = 0;
7226     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7227   %}
7228   ins_pipe( pipe_slow );
7229 %}
7230 
7231 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{
7232   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7233   match(Set dst (DivVF src (LoadVector mem)));
7234   format %{ "vdivps  $dst,$src,$mem\t! div packed2F" %}
7235   ins_encode %{
7236     int vector_len = 0;
7237     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7238   %}
7239   ins_pipe( pipe_slow );
7240 %}
7241 
7242 instruct vdiv4F(vecX dst, vecX src) %{
7243   predicate(n->as_Vector()->length() == 4);
7244   match(Set dst (DivVF dst src));
7245   format %{ "divps   $dst,$src\t! div packed4F" %}
7246   ins_encode %{
7247     __ divps($dst$$XMMRegister, $src$$XMMRegister);
7248   %}
7249   ins_pipe( pipe_slow );
7250 %}
7251 
7252 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
7253   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7254   match(Set dst (DivVF src1 src2));
7255   format %{ "vdivps  $dst,$src1,$src2\t! div packed4F" %}
7256   ins_encode %{
7257     int vector_len = 0;
7258     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7259   %}
7260   ins_pipe( pipe_slow );
7261 %}
7262 
7263 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
7264   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7265   match(Set dst (DivVF src (LoadVector mem)));
7266   format %{ "vdivps  $dst,$src,$mem\t! div packed4F" %}
7267   ins_encode %{
7268     int vector_len = 0;
7269     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7270   %}
7271   ins_pipe( pipe_slow );
7272 %}
7273 
7274 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
7275   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7276   match(Set dst (DivVF src1 src2));
7277   format %{ "vdivps  $dst,$src1,$src2\t! div packed8F" %}
7278   ins_encode %{
7279     int vector_len = 1;
7280     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7281   %}
7282   ins_pipe( pipe_slow );
7283 %}
7284 
7285 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
7286   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7287   match(Set dst (DivVF src (LoadVector mem)));
7288   format %{ "vdivps  $dst,$src,$mem\t! div packed8F" %}
7289   ins_encode %{
7290     int vector_len = 1;
7291     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7292   %}
7293   ins_pipe( pipe_slow );
7294 %}
7295 
7296 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
7297   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
7298   match(Set dst (DivVF src1 src2));
7299   format %{ "vdivps  $dst,$src1,$src2\t! div packed16F" %}
7300   ins_encode %{
7301     int vector_len = 2;
7302     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7303   %}
7304   ins_pipe( pipe_slow );
7305 %}
7306 
7307 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{
7308   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
7309   match(Set dst (DivVF src (LoadVector mem)));
7310   format %{ "vdivps  $dst,$src,$mem\t! div packed16F" %}
7311   ins_encode %{
7312     int vector_len = 2;
7313     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7314   %}
7315   ins_pipe( pipe_slow );
7316 %}
7317 
7318 // Doubles vector div
7319 instruct vdiv2D(vecX dst, vecX src) %{
7320   predicate(n->as_Vector()->length() == 2);
7321   match(Set dst (DivVD dst src));
7322   format %{ "divpd   $dst,$src\t! div packed2D" %}
7323   ins_encode %{
7324     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
7325   %}
7326   ins_pipe( pipe_slow );
7327 %}
7328 
7329 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
7330   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7331   match(Set dst (DivVD src1 src2));
7332   format %{ "vdivpd  $dst,$src1,$src2\t! div packed2D" %}
7333   ins_encode %{
7334     int vector_len = 0;
7335     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7336   %}
7337   ins_pipe( pipe_slow );
7338 %}
7339 
7340 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
7341   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7342   match(Set dst (DivVD src (LoadVector mem)));
7343   format %{ "vdivpd  $dst,$src,$mem\t! div packed2D" %}
7344   ins_encode %{
7345     int vector_len = 0;
7346     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7347   %}
7348   ins_pipe( pipe_slow );
7349 %}
7350 
7351 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
7352   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7353   match(Set dst (DivVD src1 src2));
7354   format %{ "vdivpd  $dst,$src1,$src2\t! div packed4D" %}
7355   ins_encode %{
7356     int vector_len = 1;
7357     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7358   %}
7359   ins_pipe( pipe_slow );
7360 %}
7361 
7362 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
7363   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7364   match(Set dst (DivVD src (LoadVector mem)));
7365   format %{ "vdivpd  $dst,$src,$mem\t! div packed4D" %}
7366   ins_encode %{
7367     int vector_len = 1;
7368     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7369   %}
7370   ins_pipe( pipe_slow );
7371 %}
7372 
7373 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
7374   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7375   match(Set dst (DivVD src1 src2));
7376   format %{ "vdivpd  $dst,$src1,$src2\t! div packed8D" %}
7377   ins_encode %{
7378     int vector_len = 2;
7379     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7380   %}
7381   ins_pipe( pipe_slow );
7382 %}
7383 
7384 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{
7385   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7386   match(Set dst (DivVD src (LoadVector mem)));
7387   format %{ "vdivpd  $dst,$src,$mem\t! div packed8D" %}
7388   ins_encode %{
7389     int vector_len = 2;
7390     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7391   %}
7392   ins_pipe( pipe_slow );
7393 %}
7394 
7395 // ------------------------------ Shift ---------------------------------------
7396 
7397 // Left and right shift count vectors are the same on x86
7398 // (only lowest bits of xmm reg are used for count).
7399 instruct vshiftcnt(vecS dst, rRegI cnt) %{
7400   match(Set dst (LShiftCntV cnt));
7401   match(Set dst (RShiftCntV cnt));
7402   format %{ "movd    $dst,$cnt\t! load shift count" %}
7403   ins_encode %{
7404     __ movdl($dst$$XMMRegister, $cnt$$Register);
7405   %}
7406   ins_pipe( pipe_slow );
7407 %}
7408 
7409 // ------------------------------ LeftShift -----------------------------------
7410 
7411 // Shorts/Chars vector left shift
7412 instruct vsll2S(vecS dst, vecS shift) %{
7413   predicate(n->as_Vector()->length() == 2);
7414   match(Set dst (LShiftVS dst shift));
7415   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
7416   ins_encode %{
7417     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
7418   %}
7419   ins_pipe( pipe_slow );
7420 %}
7421 
7422 instruct vsll2S_imm(vecS dst, immI8 shift) %{
7423   predicate(n->as_Vector()->length() == 2);
7424   match(Set dst (LShiftVS dst shift));
7425   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
7426   ins_encode %{
7427     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
7428   %}
7429   ins_pipe( pipe_slow );
7430 %}
7431 
7432 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
7433   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7434   match(Set dst (LShiftVS src shift));
7435   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
7436   ins_encode %{
7437     int vector_len = 0;
7438     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7439   %}
7440   ins_pipe( pipe_slow );
7441 %}
7442 
7443 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
7444   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7445   match(Set dst (LShiftVS src shift));
7446   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
7447   ins_encode %{
7448     int vector_len = 0;
7449     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7450   %}
7451   ins_pipe( pipe_slow );
7452 %}
7453 
7454 instruct vsll4S(vecD dst, vecS shift) %{
7455   predicate(n->as_Vector()->length() == 4);
7456   match(Set dst (LShiftVS dst shift));
7457   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
7458   ins_encode %{
7459     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
7460   %}
7461   ins_pipe( pipe_slow );
7462 %}
7463 
7464 instruct vsll4S_imm(vecD dst, immI8 shift) %{
7465   predicate(n->as_Vector()->length() == 4);
7466   match(Set dst (LShiftVS dst shift));
7467   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
7468   ins_encode %{
7469     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
7470   %}
7471   ins_pipe( pipe_slow );
7472 %}
7473 
7474 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
7475   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7476   match(Set dst (LShiftVS src shift));
7477   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
7478   ins_encode %{
7479     int vector_len = 0;
7480     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7481   %}
7482   ins_pipe( pipe_slow );
7483 %}
7484 
7485 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
7486   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7487   match(Set dst (LShiftVS src shift));
7488   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
7489   ins_encode %{
7490     int vector_len = 0;
7491     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7492   %}
7493   ins_pipe( pipe_slow );
7494 %}
7495 
7496 instruct vsll8S(vecX dst, vecS shift) %{
7497   predicate(n->as_Vector()->length() == 8);
7498   match(Set dst (LShiftVS dst shift));
7499   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
7500   ins_encode %{
7501     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
7502   %}
7503   ins_pipe( pipe_slow );
7504 %}
7505 
7506 instruct vsll8S_imm(vecX dst, immI8 shift) %{
7507   predicate(n->as_Vector()->length() == 8);
7508   match(Set dst (LShiftVS dst shift));
7509   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
7510   ins_encode %{
7511     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
7512   %}
7513   ins_pipe( pipe_slow );
7514 %}
7515 
7516 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
7517   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7518   match(Set dst (LShiftVS src shift));
7519   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
7520   ins_encode %{
7521     int vector_len = 0;
7522     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7523   %}
7524   ins_pipe( pipe_slow );
7525 %}
7526 
7527 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
7528   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7529   match(Set dst (LShiftVS src shift));
7530   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
7531   ins_encode %{
7532     int vector_len = 0;
7533     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7534   %}
7535   ins_pipe( pipe_slow );
7536 %}
7537 
7538 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
7539   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
7540   match(Set dst (LShiftVS src shift));
7541   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
7542   ins_encode %{
7543     int vector_len = 1;
7544     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7545   %}
7546   ins_pipe( pipe_slow );
7547 %}
7548 
7549 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
7550   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
7551   match(Set dst (LShiftVS src shift));
7552   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
7553   ins_encode %{
7554     int vector_len = 1;
7555     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7556   %}
7557   ins_pipe( pipe_slow );
7558 %}
7559 
7560 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{
7561   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
7562   match(Set dst (LShiftVS src shift));
7563   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed32S" %}
7564   ins_encode %{
7565     int vector_len = 2;
7566     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7567   %}
7568   ins_pipe( pipe_slow );
7569 %}
7570 
7571 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7572   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
7573   match(Set dst (LShiftVS src shift));
7574   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed32S" %}
7575   ins_encode %{
7576     int vector_len = 2;
7577     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7578   %}
7579   ins_pipe( pipe_slow );
7580 %}
7581 
7582 // Integers vector left shift
7583 instruct vsll2I(vecD dst, vecS shift) %{
7584   predicate(n->as_Vector()->length() == 2);
7585   match(Set dst (LShiftVI dst shift));
7586   format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
7587   ins_encode %{
7588     __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
7589   %}
7590   ins_pipe( pipe_slow );
7591 %}
7592 
7593 instruct vsll2I_imm(vecD dst, immI8 shift) %{
7594   predicate(n->as_Vector()->length() == 2);
7595   match(Set dst (LShiftVI dst shift));
7596   format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
7597   ins_encode %{
7598     __ pslld($dst$$XMMRegister, (int)$shift$$constant);
7599   %}
7600   ins_pipe( pipe_slow );
7601 %}
7602 
7603 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{
7604   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7605   match(Set dst (LShiftVI src shift));
7606   format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
7607   ins_encode %{
7608     int vector_len = 0;
7609     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7610   %}
7611   ins_pipe( pipe_slow );
7612 %}
7613 
7614 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
7615   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7616   match(Set dst (LShiftVI src shift));
7617   format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
7618   ins_encode %{
7619     int vector_len = 0;
7620     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7621   %}
7622   ins_pipe( pipe_slow );
7623 %}
7624 
7625 instruct vsll4I(vecX dst, vecS shift) %{
7626   predicate(n->as_Vector()->length() == 4);
7627   match(Set dst (LShiftVI dst shift));
7628   format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
7629   ins_encode %{
7630     __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
7631   %}
7632   ins_pipe( pipe_slow );
7633 %}
7634 
7635 instruct vsll4I_imm(vecX dst, immI8 shift) %{
7636   predicate(n->as_Vector()->length() == 4);
7637   match(Set dst (LShiftVI dst shift));
7638   format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
7639   ins_encode %{
7640     __ pslld($dst$$XMMRegister, (int)$shift$$constant);
7641   %}
7642   ins_pipe( pipe_slow );
7643 %}
7644 
7645 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{
7646   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7647   match(Set dst (LShiftVI src shift));
7648   format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
7649   ins_encode %{
7650     int vector_len = 0;
7651     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7652   %}
7653   ins_pipe( pipe_slow );
7654 %}
7655 
7656 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
7657   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7658   match(Set dst (LShiftVI src shift));
7659   format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
7660   ins_encode %{
7661     int vector_len = 0;
7662     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7663   %}
7664   ins_pipe( pipe_slow );
7665 %}
7666 
7667 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{
7668   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7669   match(Set dst (LShiftVI src shift));
7670   format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
7671   ins_encode %{
7672     int vector_len = 1;
7673     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7674   %}
7675   ins_pipe( pipe_slow );
7676 %}
7677 
7678 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
7679   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7680   match(Set dst (LShiftVI src shift));
7681   format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
7682   ins_encode %{
7683     int vector_len = 1;
7684     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7685   %}
7686   ins_pipe( pipe_slow );
7687 %}
7688 
7689 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{
7690   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7691   match(Set dst (LShiftVI src shift));
7692   format %{ "vpslld  $dst,$src,$shift\t! left shift packed16I" %}
7693   ins_encode %{
7694     int vector_len = 2;
7695     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7696   %}
7697   ins_pipe( pipe_slow );
7698 %}
7699 
7700 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7701   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7702   match(Set dst (LShiftVI src shift));
7703   format %{ "vpslld  $dst,$src,$shift\t! left shift packed16I" %}
7704   ins_encode %{
7705     int vector_len = 2;
7706     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7707   %}
7708   ins_pipe( pipe_slow );
7709 %}
7710 
7711 // Longs vector left shift
7712 instruct vsll2L(vecX dst, vecS shift) %{
7713   predicate(n->as_Vector()->length() == 2);
7714   match(Set dst (LShiftVL dst shift));
7715   format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
7716   ins_encode %{
7717     __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
7718   %}
7719   ins_pipe( pipe_slow );
7720 %}
7721 
7722 instruct vsll2L_imm(vecX dst, immI8 shift) %{
7723   predicate(n->as_Vector()->length() == 2);
7724   match(Set dst (LShiftVL dst shift));
7725   format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
7726   ins_encode %{
7727     __ psllq($dst$$XMMRegister, (int)$shift$$constant);
7728   %}
7729   ins_pipe( pipe_slow );
7730 %}
7731 
7732 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{
7733   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7734   match(Set dst (LShiftVL src shift));
7735   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
7736   ins_encode %{
7737     int vector_len = 0;
7738     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7739   %}
7740   ins_pipe( pipe_slow );
7741 %}
7742 
7743 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
7744   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7745   match(Set dst (LShiftVL src shift));
7746   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
7747   ins_encode %{
7748     int vector_len = 0;
7749     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7750   %}
7751   ins_pipe( pipe_slow );
7752 %}
7753 
7754 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{
7755   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
7756   match(Set dst (LShiftVL src shift));
7757   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
7758   ins_encode %{
7759     int vector_len = 1;
7760     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7761   %}
7762   ins_pipe( pipe_slow );
7763 %}
7764 
7765 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
7766   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
7767   match(Set dst (LShiftVL src shift));
7768   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
7769   ins_encode %{
7770     int vector_len = 1;
7771     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7772   %}
7773   ins_pipe( pipe_slow );
7774 %}
7775 
7776 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{
7777   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7778   match(Set dst (LShiftVL src shift));
7779   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed8L" %}
7780   ins_encode %{
7781     int vector_len = 2;
7782     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7783   %}
7784   ins_pipe( pipe_slow );
7785 %}
7786 
7787 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7788   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7789   match(Set dst (LShiftVL src shift));
7790   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed8L" %}
7791   ins_encode %{
7792     int vector_len = 2;
7793     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7794   %}
7795   ins_pipe( pipe_slow );
7796 %}
7797 
7798 // ----------------------- LogicalRightShift -----------------------------------
7799 
7800 // Shorts vector logical right shift produces incorrect Java result
7801 // for negative data because java code convert short value into int with
7802 // sign extension before a shift. But char vectors are fine since chars are
7803 // unsigned values.
7804 
7805 instruct vsrl2S(vecS dst, vecS shift) %{
7806   predicate(n->as_Vector()->length() == 2);
7807   match(Set dst (URShiftVS dst shift));
7808   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
7809   ins_encode %{
7810     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
7811   %}
7812   ins_pipe( pipe_slow );
7813 %}
7814 
7815 instruct vsrl2S_imm(vecS dst, immI8 shift) %{
7816   predicate(n->as_Vector()->length() == 2);
7817   match(Set dst (URShiftVS dst shift));
7818   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
7819   ins_encode %{
7820     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
7821   %}
7822   ins_pipe( pipe_slow );
7823 %}
7824 
7825 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
7826   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7827   match(Set dst (URShiftVS src shift));
7828   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
7829   ins_encode %{
7830     int vector_len = 0;
7831     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7832   %}
7833   ins_pipe( pipe_slow );
7834 %}
7835 
7836 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
7837   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7838   match(Set dst (URShiftVS src shift));
7839   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
7840   ins_encode %{
7841     int vector_len = 0;
7842     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7843   %}
7844   ins_pipe( pipe_slow );
7845 %}
7846 
7847 instruct vsrl4S(vecD dst, vecS shift) %{
7848   predicate(n->as_Vector()->length() == 4);
7849   match(Set dst (URShiftVS dst shift));
7850   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
7851   ins_encode %{
7852     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
7853   %}
7854   ins_pipe( pipe_slow );
7855 %}
7856 
7857 instruct vsrl4S_imm(vecD dst, immI8 shift) %{
7858   predicate(n->as_Vector()->length() == 4);
7859   match(Set dst (URShiftVS dst shift));
7860   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
7861   ins_encode %{
7862     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
7863   %}
7864   ins_pipe( pipe_slow );
7865 %}
7866 
7867 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
7868   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7869   match(Set dst (URShiftVS src shift));
7870   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
7871   ins_encode %{
7872     int vector_len = 0;
7873     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7874   %}
7875   ins_pipe( pipe_slow );
7876 %}
7877 
7878 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
7879   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7880   match(Set dst (URShiftVS src shift));
7881   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
7882   ins_encode %{
7883     int vector_len = 0;
7884     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7885   %}
7886   ins_pipe( pipe_slow );
7887 %}
7888 
7889 instruct vsrl8S(vecX dst, vecS shift) %{
7890   predicate(n->as_Vector()->length() == 8);
7891   match(Set dst (URShiftVS dst shift));
7892   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
7893   ins_encode %{
7894     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
7895   %}
7896   ins_pipe( pipe_slow );
7897 %}
7898 
7899 instruct vsrl8S_imm(vecX dst, immI8 shift) %{
7900   predicate(n->as_Vector()->length() == 8);
7901   match(Set dst (URShiftVS dst shift));
7902   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
7903   ins_encode %{
7904     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
7905   %}
7906   ins_pipe( pipe_slow );
7907 %}
7908 
7909 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
7910   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7911   match(Set dst (URShiftVS src shift));
7912   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
7913   ins_encode %{
7914     int vector_len = 0;
7915     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7916   %}
7917   ins_pipe( pipe_slow );
7918 %}
7919 
7920 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
7921   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7922   match(Set dst (URShiftVS src shift));
7923   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
7924   ins_encode %{
7925     int vector_len = 0;
7926     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7927   %}
7928   ins_pipe( pipe_slow );
7929 %}
7930 
7931 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
7932   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
7933   match(Set dst (URShiftVS src shift));
7934   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
7935   ins_encode %{
7936     int vector_len = 1;
7937     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7938   %}
7939   ins_pipe( pipe_slow );
7940 %}
7941 
7942 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
7943   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
7944   match(Set dst (URShiftVS src shift));
7945   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
7946   ins_encode %{
7947     int vector_len = 1;
7948     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7949   %}
7950   ins_pipe( pipe_slow );
7951 %}
7952 
7953 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{
7954   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
7955   match(Set dst (URShiftVS src shift));
7956   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed32S" %}
7957   ins_encode %{
7958     int vector_len = 2;
7959     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7960   %}
7961   ins_pipe( pipe_slow );
7962 %}
7963 
7964 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7965   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
7966   match(Set dst (URShiftVS src shift));
7967   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed32S" %}
7968   ins_encode %{
7969     int vector_len = 2;
7970     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7971   %}
7972   ins_pipe( pipe_slow );
7973 %}
7974 
7975 // Integers vector logical right shift
7976 instruct vsrl2I(vecD dst, vecS shift) %{
7977   predicate(n->as_Vector()->length() == 2);
7978   match(Set dst (URShiftVI dst shift));
7979   format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
7980   ins_encode %{
7981     __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
7982   %}
7983   ins_pipe( pipe_slow );
7984 %}
7985 
7986 instruct vsrl2I_imm(vecD dst, immI8 shift) %{
7987   predicate(n->as_Vector()->length() == 2);
7988   match(Set dst (URShiftVI dst shift));
7989   format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
7990   ins_encode %{
7991     __ psrld($dst$$XMMRegister, (int)$shift$$constant);
7992   %}
7993   ins_pipe( pipe_slow );
7994 %}
7995 
7996 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{
7997   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7998   match(Set dst (URShiftVI src shift));
7999   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
8000   ins_encode %{
8001     int vector_len = 0;
8002     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8003   %}
8004   ins_pipe( pipe_slow );
8005 %}
8006 
8007 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
8008   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8009   match(Set dst (URShiftVI src shift));
8010   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
8011   ins_encode %{
8012     int vector_len = 0;
8013     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8014   %}
8015   ins_pipe( pipe_slow );
8016 %}
8017 
8018 instruct vsrl4I(vecX dst, vecS shift) %{
8019   predicate(n->as_Vector()->length() == 4);
8020   match(Set dst (URShiftVI dst shift));
8021   format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
8022   ins_encode %{
8023     __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
8024   %}
8025   ins_pipe( pipe_slow );
8026 %}
8027 
8028 instruct vsrl4I_imm(vecX dst, immI8 shift) %{
8029   predicate(n->as_Vector()->length() == 4);
8030   match(Set dst (URShiftVI dst shift));
8031   format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
8032   ins_encode %{
8033     __ psrld($dst$$XMMRegister, (int)$shift$$constant);
8034   %}
8035   ins_pipe( pipe_slow );
8036 %}
8037 
8038 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{
8039   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8040   match(Set dst (URShiftVI src shift));
8041   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
8042   ins_encode %{
8043     int vector_len = 0;
8044     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8045   %}
8046   ins_pipe( pipe_slow );
8047 %}
8048 
8049 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
8050   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8051   match(Set dst (URShiftVI src shift));
8052   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
8053   ins_encode %{
8054     int vector_len = 0;
8055     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8056   %}
8057   ins_pipe( pipe_slow );
8058 %}
8059 
8060 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{
8061   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
8062   match(Set dst (URShiftVI src shift));
8063   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
8064   ins_encode %{
8065     int vector_len = 1;
8066     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8067   %}
8068   ins_pipe( pipe_slow );
8069 %}
8070 
8071 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
8072   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
8073   match(Set dst (URShiftVI src shift));
8074   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
8075   ins_encode %{
8076     int vector_len = 1;
8077     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8078   %}
8079   ins_pipe( pipe_slow );
8080 %}
8081 
8082 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{
8083   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
8084   match(Set dst (URShiftVI src shift));
8085   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed16I" %}
8086   ins_encode %{
8087     int vector_len = 2;
8088     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8089   %}
8090   ins_pipe( pipe_slow );
8091 %}
8092 
8093 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
8094   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
8095   match(Set dst (URShiftVI src shift));
8096   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed16I" %}
8097   ins_encode %{
8098     int vector_len = 2;
8099     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8100   %}
8101   ins_pipe( pipe_slow );
8102 %}
8103 
8104 // Longs vector logical right shift
8105 instruct vsrl2L(vecX dst, vecS shift) %{
8106   predicate(n->as_Vector()->length() == 2);
8107   match(Set dst (URShiftVL dst shift));
8108   format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
8109   ins_encode %{
8110     __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
8111   %}
8112   ins_pipe( pipe_slow );
8113 %}
8114 
8115 instruct vsrl2L_imm(vecX dst, immI8 shift) %{
8116   predicate(n->as_Vector()->length() == 2);
8117   match(Set dst (URShiftVL dst shift));
8118   format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
8119   ins_encode %{
8120     __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
8121   %}
8122   ins_pipe( pipe_slow );
8123 %}
8124 
8125 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{
8126   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8127   match(Set dst (URShiftVL src shift));
8128   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
8129   ins_encode %{
8130     int vector_len = 0;
8131     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8132   %}
8133   ins_pipe( pipe_slow );
8134 %}
8135 
8136 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
8137   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8138   match(Set dst (URShiftVL src shift));
8139   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
8140   ins_encode %{
8141     int vector_len = 0;
8142     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8143   %}
8144   ins_pipe( pipe_slow );
8145 %}
8146 
8147 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{
8148   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
8149   match(Set dst (URShiftVL src shift));
8150   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
8151   ins_encode %{
8152     int vector_len = 1;
8153     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8154   %}
8155   ins_pipe( pipe_slow );
8156 %}
8157 
8158 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
8159   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
8160   match(Set dst (URShiftVL src shift));
8161   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
8162   ins_encode %{
8163     int vector_len = 1;
8164     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8165   %}
8166   ins_pipe( pipe_slow );
8167 %}
8168 
8169 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{
8170   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
8171   match(Set dst (URShiftVL src shift));
8172   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed8L" %}
8173   ins_encode %{
8174     int vector_len = 2;
8175     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8176   %}
8177   ins_pipe( pipe_slow );
8178 %}
8179 
8180 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
8181   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
8182   match(Set dst (URShiftVL src shift));
8183   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed8L" %}
8184   ins_encode %{
8185     int vector_len = 2;
8186     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8187   %}
8188   ins_pipe( pipe_slow );
8189 %}
8190 
8191 // ------------------- ArithmeticRightShift -----------------------------------
8192 
8193 // Shorts/Chars vector arithmetic right shift
8194 instruct vsra2S(vecS dst, vecS shift) %{
8195   predicate(n->as_Vector()->length() == 2);
8196   match(Set dst (RShiftVS dst shift));
8197   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
8198   ins_encode %{
8199     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
8200   %}
8201   ins_pipe( pipe_slow );
8202 %}
8203 
8204 instruct vsra2S_imm(vecS dst, immI8 shift) %{
8205   predicate(n->as_Vector()->length() == 2);
8206   match(Set dst (RShiftVS dst shift));
8207   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
8208   ins_encode %{
8209     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
8210   %}
8211   ins_pipe( pipe_slow );
8212 %}
8213 
8214 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
8215   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8216   match(Set dst (RShiftVS src shift));
8217   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
8218   ins_encode %{
8219     int vector_len = 0;
8220     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8221   %}
8222   ins_pipe( pipe_slow );
8223 %}
8224 
8225 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
8226   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8227   match(Set dst (RShiftVS src shift));
8228   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
8229   ins_encode %{
8230     int vector_len = 0;
8231     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8232   %}
8233   ins_pipe( pipe_slow );
8234 %}
8235 
8236 instruct vsra4S(vecD dst, vecS shift) %{
8237   predicate(n->as_Vector()->length() == 4);
8238   match(Set dst (RShiftVS dst shift));
8239   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
8240   ins_encode %{
8241     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
8242   %}
8243   ins_pipe( pipe_slow );
8244 %}
8245 
8246 instruct vsra4S_imm(vecD dst, immI8 shift) %{
8247   predicate(n->as_Vector()->length() == 4);
8248   match(Set dst (RShiftVS dst shift));
8249   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
8250   ins_encode %{
8251     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
8252   %}
8253   ins_pipe( pipe_slow );
8254 %}
8255 
8256 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
8257   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8258   match(Set dst (RShiftVS src shift));
8259   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
8260   ins_encode %{
8261     int vector_len = 0;
8262     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8263   %}
8264   ins_pipe( pipe_slow );
8265 %}
8266 
8267 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
8268   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8269   match(Set dst (RShiftVS src shift));
8270   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
8271   ins_encode %{
8272     int vector_len = 0;
8273     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8274   %}
8275   ins_pipe( pipe_slow );
8276 %}
8277 
8278 instruct vsra8S(vecX dst, vecS shift) %{
8279   predicate(n->as_Vector()->length() == 8);
8280   match(Set dst (RShiftVS dst shift));
8281   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
8282   ins_encode %{
8283     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
8284   %}
8285   ins_pipe( pipe_slow );
8286 %}
8287 
8288 instruct vsra8S_imm(vecX dst, immI8 shift) %{
8289   predicate(n->as_Vector()->length() == 8);
8290   match(Set dst (RShiftVS dst shift));
8291   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
8292   ins_encode %{
8293     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
8294   %}
8295   ins_pipe( pipe_slow );
8296 %}
8297 
8298 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
8299   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
8300   match(Set dst (RShiftVS src shift));
8301   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
8302   ins_encode %{
8303     int vector_len = 0;
8304     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8305   %}
8306   ins_pipe( pipe_slow );
8307 %}
8308 
8309 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
8310   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
8311   match(Set dst (RShiftVS src shift));
8312   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
8313   ins_encode %{
8314     int vector_len = 0;
8315     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8316   %}
8317   ins_pipe( pipe_slow );
8318 %}
8319 
8320 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
8321   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
8322   match(Set dst (RShiftVS src shift));
8323   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
8324   ins_encode %{
8325     int vector_len = 1;
8326     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8327   %}
8328   ins_pipe( pipe_slow );
8329 %}
8330 
8331 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
8332   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
8333   match(Set dst (RShiftVS src shift));
8334   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
8335   ins_encode %{
8336     int vector_len = 1;
8337     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8338   %}
8339   ins_pipe( pipe_slow );
8340 %}
8341 
8342 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{
8343   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
8344   match(Set dst (RShiftVS src shift));
8345   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed32S" %}
8346   ins_encode %{
8347     int vector_len = 2;
8348     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8349   %}
8350   ins_pipe( pipe_slow );
8351 %}
8352 
8353 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
8354   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
8355   match(Set dst (RShiftVS src shift));
8356   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed32S" %}
8357   ins_encode %{
8358     int vector_len = 2;
8359     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8360   %}
8361   ins_pipe( pipe_slow );
8362 %}
8363 
8364 // Integers vector arithmetic right shift
8365 instruct vsra2I(vecD dst, vecS shift) %{
8366   predicate(n->as_Vector()->length() == 2);
8367   match(Set dst (RShiftVI dst shift));
8368   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
8369   ins_encode %{
8370     __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
8371   %}
8372   ins_pipe( pipe_slow );
8373 %}
8374 
8375 instruct vsra2I_imm(vecD dst, immI8 shift) %{
8376   predicate(n->as_Vector()->length() == 2);
8377   match(Set dst (RShiftVI dst shift));
8378   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
8379   ins_encode %{
8380     __ psrad($dst$$XMMRegister, (int)$shift$$constant);
8381   %}
8382   ins_pipe( pipe_slow );
8383 %}
8384 
8385 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{
8386   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8387   match(Set dst (RShiftVI src shift));
8388   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
8389   ins_encode %{
8390     int vector_len = 0;
8391     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8392   %}
8393   ins_pipe( pipe_slow );
8394 %}
8395 
8396 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
8397   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8398   match(Set dst (RShiftVI src shift));
8399   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
8400   ins_encode %{
8401     int vector_len = 0;
8402     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8403   %}
8404   ins_pipe( pipe_slow );
8405 %}
8406 
8407 instruct vsra4I(vecX dst, vecS shift) %{
8408   predicate(n->as_Vector()->length() == 4);
8409   match(Set dst (RShiftVI dst shift));
8410   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
8411   ins_encode %{
8412     __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
8413   %}
8414   ins_pipe( pipe_slow );
8415 %}
8416 
8417 instruct vsra4I_imm(vecX dst, immI8 shift) %{
8418   predicate(n->as_Vector()->length() == 4);
8419   match(Set dst (RShiftVI dst shift));
8420   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
8421   ins_encode %{
8422     __ psrad($dst$$XMMRegister, (int)$shift$$constant);
8423   %}
8424   ins_pipe( pipe_slow );
8425 %}
8426 
8427 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{
8428   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8429   match(Set dst (RShiftVI src shift));
8430   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
8431   ins_encode %{
8432     int vector_len = 0;
8433     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8434   %}
8435   ins_pipe( pipe_slow );
8436 %}
8437 
8438 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
8439   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8440   match(Set dst (RShiftVI src shift));
8441   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
8442   ins_encode %{
8443     int vector_len = 0;
8444     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8445   %}
8446   ins_pipe( pipe_slow );
8447 %}
8448 
8449 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{
8450   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
8451   match(Set dst (RShiftVI src shift));
8452   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
8453   ins_encode %{
8454     int vector_len = 1;
8455     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8456   %}
8457   ins_pipe( pipe_slow );
8458 %}
8459 
8460 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
8461   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
8462   match(Set dst (RShiftVI src shift));
8463   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
8464   ins_encode %{
8465     int vector_len = 1;
8466     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8467   %}
8468   ins_pipe( pipe_slow );
8469 %}
8470 
8471 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{
8472   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
8473   match(Set dst (RShiftVI src shift));
8474   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed16I" %}
8475   ins_encode %{
8476     int vector_len = 2;
8477     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8478   %}
8479   ins_pipe( pipe_slow );
8480 %}
8481 
8482 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
8483   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
8484   match(Set dst (RShiftVI src shift));
8485   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed16I" %}
8486   ins_encode %{
8487     int vector_len = 2;
8488     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8489   %}
8490   ins_pipe( pipe_slow );
8491 %}
8492 
8493 // There are no longs vector arithmetic right shift instructions.
8494 
8495 
8496 // --------------------------------- AND --------------------------------------
8497 
8498 instruct vand4B(vecS dst, vecS src) %{
8499   predicate(n->as_Vector()->length_in_bytes() == 4);
8500   match(Set dst (AndV dst src));
8501   format %{ "pand    $dst,$src\t! and vectors (4 bytes)" %}
8502   ins_encode %{
8503     __ pand($dst$$XMMRegister, $src$$XMMRegister);
8504   %}
8505   ins_pipe( pipe_slow );
8506 %}
8507 
8508 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
8509   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
8510   match(Set dst (AndV src1 src2));
8511   format %{ "vpand   $dst,$src1,$src2\t! and vectors (4 bytes)" %}
8512   ins_encode %{
8513     int vector_len = 0;
8514     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8515   %}
8516   ins_pipe( pipe_slow );
8517 %}
8518 
8519 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{
8520   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
8521   match(Set dst (AndV src (LoadVector mem)));
8522   format %{ "vpand   $dst,$src,$mem\t! and vectors (4 bytes)" %}
8523   ins_encode %{
8524     int vector_len = 0;
8525     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8526   %}
8527   ins_pipe( pipe_slow );
8528 %}
8529 
8530 instruct vand8B(vecD dst, vecD src) %{
8531   predicate(n->as_Vector()->length_in_bytes() == 8);
8532   match(Set dst (AndV dst src));
8533   format %{ "pand    $dst,$src\t! and vectors (8 bytes)" %}
8534   ins_encode %{
8535     __ pand($dst$$XMMRegister, $src$$XMMRegister);
8536   %}
8537   ins_pipe( pipe_slow );
8538 %}
8539 
8540 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
8541   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
8542   match(Set dst (AndV src1 src2));
8543   format %{ "vpand   $dst,$src1,$src2\t! and vectors (8 bytes)" %}
8544   ins_encode %{
8545     int vector_len = 0;
8546     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8547   %}
8548   ins_pipe( pipe_slow );
8549 %}
8550 
8551 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{
8552   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
8553   match(Set dst (AndV src (LoadVector mem)));
8554   format %{ "vpand   $dst,$src,$mem\t! and vectors (8 bytes)" %}
8555   ins_encode %{
8556     int vector_len = 0;
8557     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8558   %}
8559   ins_pipe( pipe_slow );
8560 %}
8561 
8562 instruct vand16B(vecX dst, vecX src) %{
8563   predicate(n->as_Vector()->length_in_bytes() == 16);
8564   match(Set dst (AndV dst src));
8565   format %{ "pand    $dst,$src\t! and vectors (16 bytes)" %}
8566   ins_encode %{
8567     __ pand($dst$$XMMRegister, $src$$XMMRegister);
8568   %}
8569   ins_pipe( pipe_slow );
8570 %}
8571 
8572 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
8573   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
8574   match(Set dst (AndV src1 src2));
8575   format %{ "vpand   $dst,$src1,$src2\t! and vectors (16 bytes)" %}
8576   ins_encode %{
8577     int vector_len = 0;
8578     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8579   %}
8580   ins_pipe( pipe_slow );
8581 %}
8582 
8583 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
8584   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
8585   match(Set dst (AndV src (LoadVector mem)));
8586   format %{ "vpand   $dst,$src,$mem\t! and vectors (16 bytes)" %}
8587   ins_encode %{
8588     int vector_len = 0;
8589     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8590   %}
8591   ins_pipe( pipe_slow );
8592 %}
8593 
8594 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
8595   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
8596   match(Set dst (AndV src1 src2));
8597   format %{ "vpand   $dst,$src1,$src2\t! and vectors (32 bytes)" %}
8598   ins_encode %{
8599     int vector_len = 1;
8600     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8601   %}
8602   ins_pipe( pipe_slow );
8603 %}
8604 
8605 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
8606   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
8607   match(Set dst (AndV src (LoadVector mem)));
8608   format %{ "vpand   $dst,$src,$mem\t! and vectors (32 bytes)" %}
8609   ins_encode %{
8610     int vector_len = 1;
8611     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8612   %}
8613   ins_pipe( pipe_slow );
8614 %}
8615 
8616 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
8617   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
8618   match(Set dst (AndV src1 src2));
8619   format %{ "vpand   $dst,$src1,$src2\t! and vectors (64 bytes)" %}
8620   ins_encode %{
8621     int vector_len = 2;
8622     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8623   %}
8624   ins_pipe( pipe_slow );
8625 %}
8626 
8627 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{
8628   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
8629   match(Set dst (AndV src (LoadVector mem)));
8630   format %{ "vpand   $dst,$src,$mem\t! and vectors (64 bytes)" %}
8631   ins_encode %{
8632     int vector_len = 2;
8633     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8634   %}
8635   ins_pipe( pipe_slow );
8636 %}
8637 
8638 // --------------------------------- OR ---------------------------------------
8639 
8640 instruct vor4B(vecS dst, vecS src) %{
8641   predicate(n->as_Vector()->length_in_bytes() == 4);
8642   match(Set dst (OrV dst src));
8643   format %{ "por     $dst,$src\t! or vectors (4 bytes)" %}
8644   ins_encode %{
8645     __ por($dst$$XMMRegister, $src$$XMMRegister);
8646   %}
8647   ins_pipe( pipe_slow );
8648 %}
8649 
8650 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
8651   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
8652   match(Set dst (OrV src1 src2));
8653   format %{ "vpor    $dst,$src1,$src2\t! or vectors (4 bytes)" %}
8654   ins_encode %{
8655     int vector_len = 0;
8656     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8657   %}
8658   ins_pipe( pipe_slow );
8659 %}
8660 
8661 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{
8662   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
8663   match(Set dst (OrV src (LoadVector mem)));
8664   format %{ "vpor    $dst,$src,$mem\t! or vectors (4 bytes)" %}
8665   ins_encode %{
8666     int vector_len = 0;
8667     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8668   %}
8669   ins_pipe( pipe_slow );
8670 %}
8671 
8672 instruct vor8B(vecD dst, vecD src) %{
8673   predicate(n->as_Vector()->length_in_bytes() == 8);
8674   match(Set dst (OrV dst src));
8675   format %{ "por     $dst,$src\t! or vectors (8 bytes)" %}
8676   ins_encode %{
8677     __ por($dst$$XMMRegister, $src$$XMMRegister);
8678   %}
8679   ins_pipe( pipe_slow );
8680 %}
8681 
8682 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
8683   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
8684   match(Set dst (OrV src1 src2));
8685   format %{ "vpor    $dst,$src1,$src2\t! or vectors (8 bytes)" %}
8686   ins_encode %{
8687     int vector_len = 0;
8688     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8689   %}
8690   ins_pipe( pipe_slow );
8691 %}
8692 
8693 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{
8694   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
8695   match(Set dst (OrV src (LoadVector mem)));
8696   format %{ "vpor    $dst,$src,$mem\t! or vectors (8 bytes)" %}
8697   ins_encode %{
8698     int vector_len = 0;
8699     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8700   %}
8701   ins_pipe( pipe_slow );
8702 %}
8703 
8704 instruct vor16B(vecX dst, vecX src) %{
8705   predicate(n->as_Vector()->length_in_bytes() == 16);
8706   match(Set dst (OrV dst src));
8707   format %{ "por     $dst,$src\t! or vectors (16 bytes)" %}
8708   ins_encode %{
8709     __ por($dst$$XMMRegister, $src$$XMMRegister);
8710   %}
8711   ins_pipe( pipe_slow );
8712 %}
8713 
8714 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
8715   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
8716   match(Set dst (OrV src1 src2));
8717   format %{ "vpor    $dst,$src1,$src2\t! or vectors (16 bytes)" %}
8718   ins_encode %{
8719     int vector_len = 0;
8720     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8721   %}
8722   ins_pipe( pipe_slow );
8723 %}
8724 
8725 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
8726   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
8727   match(Set dst (OrV src (LoadVector mem)));
8728   format %{ "vpor    $dst,$src,$mem\t! or vectors (16 bytes)" %}
8729   ins_encode %{
8730     int vector_len = 0;
8731     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8732   %}
8733   ins_pipe( pipe_slow );
8734 %}
8735 
8736 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
8737   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
8738   match(Set dst (OrV src1 src2));
8739   format %{ "vpor    $dst,$src1,$src2\t! or vectors (32 bytes)" %}
8740   ins_encode %{
8741     int vector_len = 1;
8742     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8743   %}
8744   ins_pipe( pipe_slow );
8745 %}
8746 
8747 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
8748   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
8749   match(Set dst (OrV src (LoadVector mem)));
8750   format %{ "vpor    $dst,$src,$mem\t! or vectors (32 bytes)" %}
8751   ins_encode %{
8752     int vector_len = 1;
8753     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8754   %}
8755   ins_pipe( pipe_slow );
8756 %}
8757 
8758 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
8759   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
8760   match(Set dst (OrV src1 src2));
8761   format %{ "vpor    $dst,$src1,$src2\t! or vectors (64 bytes)" %}
8762   ins_encode %{
8763     int vector_len = 2;
8764     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8765   %}
8766   ins_pipe( pipe_slow );
8767 %}
8768 
8769 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{
8770   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
8771   match(Set dst (OrV src (LoadVector mem)));
8772   format %{ "vpor    $dst,$src,$mem\t! or vectors (64 bytes)" %}
8773   ins_encode %{
8774     int vector_len = 2;
8775     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8776   %}
8777   ins_pipe( pipe_slow );
8778 %}
8779 
8780 // --------------------------------- XOR --------------------------------------
8781 
8782 instruct vxor4B(vecS dst, vecS src) %{
8783   predicate(n->as_Vector()->length_in_bytes() == 4);
8784   match(Set dst (XorV dst src));
8785   format %{ "pxor    $dst,$src\t! xor vectors (4 bytes)" %}
8786   ins_encode %{
8787     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
8788   %}
8789   ins_pipe( pipe_slow );
8790 %}
8791 
8792 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
8793   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
8794   match(Set dst (XorV src1 src2));
8795   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
8796   ins_encode %{
8797     int vector_len = 0;
8798     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8799   %}
8800   ins_pipe( pipe_slow );
8801 %}
8802 
8803 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{
8804   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
8805   match(Set dst (XorV src (LoadVector mem)));
8806   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (4 bytes)" %}
8807   ins_encode %{
8808     int vector_len = 0;
8809     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8810   %}
8811   ins_pipe( pipe_slow );
8812 %}
8813 
8814 instruct vxor8B(vecD dst, vecD src) %{
8815   predicate(n->as_Vector()->length_in_bytes() == 8);
8816   match(Set dst (XorV dst src));
8817   format %{ "pxor    $dst,$src\t! xor vectors (8 bytes)" %}
8818   ins_encode %{
8819     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
8820   %}
8821   ins_pipe( pipe_slow );
8822 %}
8823 
8824 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
8825   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
8826   match(Set dst (XorV src1 src2));
8827   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
8828   ins_encode %{
8829     int vector_len = 0;
8830     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8831   %}
8832   ins_pipe( pipe_slow );
8833 %}
8834 
8835 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{
8836   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
8837   match(Set dst (XorV src (LoadVector mem)));
8838   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (8 bytes)" %}
8839   ins_encode %{
8840     int vector_len = 0;
8841     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8842   %}
8843   ins_pipe( pipe_slow );
8844 %}
8845 
8846 instruct vxor16B(vecX dst, vecX src) %{
8847   predicate(n->as_Vector()->length_in_bytes() == 16);
8848   match(Set dst (XorV dst src));
8849   format %{ "pxor    $dst,$src\t! xor vectors (16 bytes)" %}
8850   ins_encode %{
8851     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
8852   %}
8853   ins_pipe( pipe_slow );
8854 %}
8855 
8856 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
8857   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
8858   match(Set dst (XorV src1 src2));
8859   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
8860   ins_encode %{
8861     int vector_len = 0;
8862     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8863   %}
8864   ins_pipe( pipe_slow );
8865 %}
8866 
8867 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
8868   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
8869   match(Set dst (XorV src (LoadVector mem)));
8870   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (16 bytes)" %}
8871   ins_encode %{
8872     int vector_len = 0;
8873     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8874   %}
8875   ins_pipe( pipe_slow );
8876 %}
8877 
8878 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
8879   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
8880   match(Set dst (XorV src1 src2));
8881   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
8882   ins_encode %{
8883     int vector_len = 1;
8884     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8885   %}
8886   ins_pipe( pipe_slow );
8887 %}
8888 
8889 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
8890   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
8891   match(Set dst (XorV src (LoadVector mem)));
8892   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (32 bytes)" %}
8893   ins_encode %{
8894     int vector_len = 1;
8895     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8896   %}
8897   ins_pipe( pipe_slow );
8898 %}
8899 
8900 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
8901   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
8902   match(Set dst (XorV src1 src2));
8903   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (64 bytes)" %}
8904   ins_encode %{
8905     int vector_len = 2;
8906     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8907   %}
8908   ins_pipe( pipe_slow );
8909 %}
8910 
8911 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{
8912   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
8913   match(Set dst (XorV src (LoadVector mem)));
8914   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (64 bytes)" %}
8915   ins_encode %{
8916     int vector_len = 2;
8917     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8918   %}
8919   ins_pipe( pipe_slow );
8920 %}
8921