1 //
   2 // Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Common Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  63 // Word a in each register holds a Float, words ab hold a Double.
  64 // The whole registers are used in SSE4.2 version intrinsics,
  65 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  66 // UseXMMForArrayCopy and UseSuperword flags).
  67 // For pre EVEX enabled architectures:
  68 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  69 // For EVEX enabled architectures:
  70 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  71 //
  72 // Linux ABI:   No register preserved across function calls
  73 //              XMM0-XMM7 might hold parameters
  74 // Windows ABI: XMM6-XMM31 preserved across function calls
  75 //              XMM0-XMM3 might hold parameters
  76 
  77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  93 
  94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
 110 
 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
 127 
 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
 144 
 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
 161 
 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
 178 
 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
 195 
 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
 212 
 213 #ifdef _LP64
 214 
 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
 231 
 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
 248 
 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
 265 
 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
 282 
 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
 299 
 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
 316 
 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
 333 
 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
 350 
 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
 367 
 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
 384 
 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
 401 
 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
 418 
 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
 435 
 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
 452 
 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
 469 
 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
 486 
 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
 503 
 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
 520 
 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
 537 
 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
 554 
 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
 571 
 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
 588 
 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
 605 
 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
 622 
 623 #endif // _LP64
 624 
 625 #ifdef _LP64
 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
 627 #else
 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
 629 #endif // _LP64
 630 
 631 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 632                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 633                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 634                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 635                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 636                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 637                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 638                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p
 639 #ifdef _LP64
 640                   ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 641                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 642                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 643                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 644                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 645                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 646                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 647                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
 648                   ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 649                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 650                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 651                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 652                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 653                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 654                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 655                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 656                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 657                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 658                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 659                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 660                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 661                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 662                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 663                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p
 664 #endif
 665                       );
 666 
 667 // flags allocation class should be last.
 668 alloc_class chunk2(RFLAGS);
 669 
 670 // Singleton class for condition codes
 671 reg_class int_flags(RFLAGS);
 672 
 673 // Class for pre evex float registers
 674 reg_class float_reg_legacy(XMM0,
 675                     XMM1,
 676                     XMM2,
 677                     XMM3,
 678                     XMM4,
 679                     XMM5,
 680                     XMM6,
 681                     XMM7
 682 #ifdef _LP64
 683                    ,XMM8,
 684                     XMM9,
 685                     XMM10,
 686                     XMM11,
 687                     XMM12,
 688                     XMM13,
 689                     XMM14,
 690                     XMM15
 691 #endif
 692                     );
 693 
 694 // Class for evex float registers
 695 reg_class float_reg_evex(XMM0,
 696                     XMM1,
 697                     XMM2,
 698                     XMM3,
 699                     XMM4,
 700                     XMM5,
 701                     XMM6,
 702                     XMM7
 703 #ifdef _LP64
 704                    ,XMM8,
 705                     XMM9,
 706                     XMM10,
 707                     XMM11,
 708                     XMM12,
 709                     XMM13,
 710                     XMM14,
 711                     XMM15,
 712                     XMM16,
 713                     XMM17,
 714                     XMM18,
 715                     XMM19,
 716                     XMM20,
 717                     XMM21,
 718                     XMM22,
 719                     XMM23,
 720                     XMM24,
 721                     XMM25,
 722                     XMM26,
 723                     XMM27,
 724                     XMM28,
 725                     XMM29,
 726                     XMM30,
 727                     XMM31
 728 #endif
 729                     );
 730 
 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 732 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 733 
 734 // Class for pre evex double registers
 735 reg_class double_reg_legacy(XMM0,  XMM0b,
 736                      XMM1,  XMM1b,
 737                      XMM2,  XMM2b,
 738                      XMM3,  XMM3b,
 739                      XMM4,  XMM4b,
 740                      XMM5,  XMM5b,
 741                      XMM6,  XMM6b,
 742                      XMM7,  XMM7b
 743 #ifdef _LP64
 744                     ,XMM8,  XMM8b,
 745                      XMM9,  XMM9b,
 746                      XMM10, XMM10b,
 747                      XMM11, XMM11b,
 748                      XMM12, XMM12b,
 749                      XMM13, XMM13b,
 750                      XMM14, XMM14b,
 751                      XMM15, XMM15b
 752 #endif
 753                      );
 754 
 755 // Class for evex double registers
 756 reg_class double_reg_evex(XMM0,  XMM0b,
 757                      XMM1,  XMM1b,
 758                      XMM2,  XMM2b,
 759                      XMM3,  XMM3b,
 760                      XMM4,  XMM4b,
 761                      XMM5,  XMM5b,
 762                      XMM6,  XMM6b,
 763                      XMM7,  XMM7b
 764 #ifdef _LP64
 765                     ,XMM8,  XMM8b,
 766                      XMM9,  XMM9b,
 767                      XMM10, XMM10b,
 768                      XMM11, XMM11b,
 769                      XMM12, XMM12b,
 770                      XMM13, XMM13b,
 771                      XMM14, XMM14b,
 772                      XMM15, XMM15b,
 773                      XMM16, XMM16b,
 774                      XMM17, XMM17b,
 775                      XMM18, XMM18b,
 776                      XMM19, XMM19b,
 777                      XMM20, XMM20b,
 778                      XMM21, XMM21b,
 779                      XMM22, XMM22b,
 780                      XMM23, XMM23b,
 781                      XMM24, XMM24b,
 782                      XMM25, XMM25b,
 783                      XMM26, XMM26b,
 784                      XMM27, XMM27b,
 785                      XMM28, XMM28b,
 786                      XMM29, XMM29b,
 787                      XMM30, XMM30b,
 788                      XMM31, XMM31b
 789 #endif
 790                      );
 791 
 792 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 793 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 794 
 795 // Class for pre evex 32bit vector registers
 796 reg_class vectors_reg_legacy(XMM0,
 797                       XMM1,
 798                       XMM2,
 799                       XMM3,
 800                       XMM4,
 801                       XMM5,
 802                       XMM6,
 803                       XMM7
 804 #ifdef _LP64
 805                      ,XMM8,
 806                       XMM9,
 807                       XMM10,
 808                       XMM11,
 809                       XMM12,
 810                       XMM13,
 811                       XMM14,
 812                       XMM15
 813 #endif
 814                       );
 815 
 816 // Class for evex 32bit vector registers
 817 reg_class vectors_reg_evex(XMM0,
 818                       XMM1,
 819                       XMM2,
 820                       XMM3,
 821                       XMM4,
 822                       XMM5,
 823                       XMM6,
 824                       XMM7
 825 #ifdef _LP64
 826                      ,XMM8,
 827                       XMM9,
 828                       XMM10,
 829                       XMM11,
 830                       XMM12,
 831                       XMM13,
 832                       XMM14,
 833                       XMM15,
 834                       XMM16,
 835                       XMM17,
 836                       XMM18,
 837                       XMM19,
 838                       XMM20,
 839                       XMM21,
 840                       XMM22,
 841                       XMM23,
 842                       XMM24,
 843                       XMM25,
 844                       XMM26,
 845                       XMM27,
 846                       XMM28,
 847                       XMM29,
 848                       XMM30,
 849                       XMM31
 850 #endif
 851                       );
 852 
 853 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 854 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 855 
 856 // Class for all 64bit vector registers
 857 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 858                       XMM1,  XMM1b,
 859                       XMM2,  XMM2b,
 860                       XMM3,  XMM3b,
 861                       XMM4,  XMM4b,
 862                       XMM5,  XMM5b,
 863                       XMM6,  XMM6b,
 864                       XMM7,  XMM7b
 865 #ifdef _LP64
 866                      ,XMM8,  XMM8b,
 867                       XMM9,  XMM9b,
 868                       XMM10, XMM10b,
 869                       XMM11, XMM11b,
 870                       XMM12, XMM12b,
 871                       XMM13, XMM13b,
 872                       XMM14, XMM14b,
 873                       XMM15, XMM15b
 874 #endif
 875                       );
 876 
 877 // Class for all 64bit vector registers
 878 reg_class vectord_reg_evex(XMM0,  XMM0b,
 879                       XMM1,  XMM1b,
 880                       XMM2,  XMM2b,
 881                       XMM3,  XMM3b,
 882                       XMM4,  XMM4b,
 883                       XMM5,  XMM5b,
 884                       XMM6,  XMM6b,
 885                       XMM7,  XMM7b
 886 #ifdef _LP64
 887                      ,XMM8,  XMM8b,
 888                       XMM9,  XMM9b,
 889                       XMM10, XMM10b,
 890                       XMM11, XMM11b,
 891                       XMM12, XMM12b,
 892                       XMM13, XMM13b,
 893                       XMM14, XMM14b,
 894                       XMM15, XMM15b,
 895                       XMM16, XMM16b,
 896                       XMM17, XMM17b,
 897                       XMM18, XMM18b,
 898                       XMM19, XMM19b,
 899                       XMM20, XMM20b,
 900                       XMM21, XMM21b,
 901                       XMM22, XMM22b,
 902                       XMM23, XMM23b,
 903                       XMM24, XMM24b,
 904                       XMM25, XMM25b,
 905                       XMM26, XMM26b,
 906                       XMM27, XMM27b,
 907                       XMM28, XMM28b,
 908                       XMM29, XMM29b,
 909                       XMM30, XMM30b,
 910                       XMM31, XMM31b
 911 #endif
 912                       );
 913 
 914 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 915 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 916 
 917 // Class for all 128bit vector registers
 918 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 919                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 920                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 921                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 922                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 923                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 924                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 925                       XMM7,  XMM7b,  XMM7c,  XMM7d
 926 #ifdef _LP64
 927                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,
 928                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 929                       XMM10, XMM10b, XMM10c, XMM10d,
 930                       XMM11, XMM11b, XMM11c, XMM11d,
 931                       XMM12, XMM12b, XMM12c, XMM12d,
 932                       XMM13, XMM13b, XMM13c, XMM13d,
 933                       XMM14, XMM14b, XMM14c, XMM14d,
 934                       XMM15, XMM15b, XMM15c, XMM15d
 935 #endif
 936                       );
 937 
 938 // Class for all 128bit vector registers
 939 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 940                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 941                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 942                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 943                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 944                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 945                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 946                       XMM7,  XMM7b,  XMM7c,  XMM7d
 947 #ifdef _LP64
 948                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,
 949                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 950                       XMM10, XMM10b, XMM10c, XMM10d,
 951                       XMM11, XMM11b, XMM11c, XMM11d,
 952                       XMM12, XMM12b, XMM12c, XMM12d,
 953                       XMM13, XMM13b, XMM13c, XMM13d,
 954                       XMM14, XMM14b, XMM14c, XMM14d,
 955                       XMM15, XMM15b, XMM15c, XMM15d,
 956                       XMM16, XMM16b, XMM16c, XMM16d,
 957                       XMM17, XMM17b, XMM17c, XMM17d,
 958                       XMM18, XMM18b, XMM18c, XMM18d,
 959                       XMM19, XMM19b, XMM19c, XMM19d,
 960                       XMM20, XMM20b, XMM20c, XMM20d,
 961                       XMM21, XMM21b, XMM21c, XMM21d,
 962                       XMM22, XMM22b, XMM22c, XMM22d,
 963                       XMM23, XMM23b, XMM23c, XMM23d,
 964                       XMM24, XMM24b, XMM24c, XMM24d,
 965                       XMM25, XMM25b, XMM25c, XMM25d,
 966                       XMM26, XMM26b, XMM26c, XMM26d,
 967                       XMM27, XMM27b, XMM27c, XMM27d,
 968                       XMM28, XMM28b, XMM28c, XMM28d,
 969                       XMM29, XMM29b, XMM29c, XMM29d,
 970                       XMM30, XMM30b, XMM30c, XMM30d,
 971                       XMM31, XMM31b, XMM31c, XMM31d
 972 #endif
 973                       );
 974 
 975 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 976 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 977 
 978 // Class for all 256bit vector registers
 979 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 980                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 981                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 982                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 983                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 984                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 985                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 986                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
 987 #ifdef _LP64
 988                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 989                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 990                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 991                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 992                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 993                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 994                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 995                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
 996 #endif
 997                       );
 998 
 999 // Class for all 256bit vector registers
1000 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
1001                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
1002                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
1003                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
1004                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
1005                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
1006                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
1007                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
1008 #ifdef _LP64
1009                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
1010                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
1011                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1012                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1013                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1014                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1015                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1016                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1017                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1018                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1019                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1020                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1021                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1022                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1023                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1024                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1025                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1026                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1027                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1028                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1029                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1030                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1031                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1032                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h
1033 #endif
1034                       );
1035 
1036 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1037 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1038 
1039 // Class for all 512bit vector registers
1040 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
1041                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
1042                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
1043                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
1044                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
1045                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
1046                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
1047                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p
1048 #ifdef _LP64
1049                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
1050                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
1051                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1052                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1053                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1054                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1055                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1056                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
1057                      ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1058                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1059                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1060                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1061                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1062                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1063                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1064                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1065                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1066                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1067                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1068                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1069                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1070                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1071                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1072                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p
1073 #endif
1074                       );
1075 
1076 // Class for restricted 512bit vector registers
1077 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
1078                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
1079                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
1080                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
1081                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
1082                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
1083                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
1084                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p
1085 #ifdef _LP64
1086                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
1087                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
1088                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1089                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1090                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1091                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1092                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1093                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
1094 #endif
1095                       );
1096 
1097 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1098 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1099 
1100 %}
1101 
1102 
1103 //----------SOURCE BLOCK-------------------------------------------------------
1104 // This is a block of C++ code which provides values, functions, and
1105 // definitions necessary in the rest of the architecture description
1106 
1107 source_hpp %{
1108 // Header information of the source block.
1109 // Method declarations/definitions which are used outside
1110 // the ad-scope can conveniently be defined here.
1111 //
1112 // To keep related declarations/definitions/uses close together,
1113 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
1114 
1115 class NativeJump;
1116 
1117 class CallStubImpl {
1118 
1119   //--------------------------------------------------------------
1120   //---<  Used for optimization in Compile::shorten_branches  >---
1121   //--------------------------------------------------------------
1122 
1123  public:
1124   // Size of call trampoline stub.
1125   static uint size_call_trampoline() {
1126     return 0; // no call trampolines on this platform
1127   }
1128 
1129   // number of relocations needed by a call trampoline stub
1130   static uint reloc_call_trampoline() {
1131     return 0; // no call trampolines on this platform
1132   }
1133 };
1134 
1135 class HandlerImpl {
1136 
1137  public:
1138 
1139   static int emit_exception_handler(CodeBuffer &cbuf);
1140   static int emit_deopt_handler(CodeBuffer& cbuf);
1141 
1142   static uint size_exception_handler() {
1143     // NativeCall instruction size is the same as NativeJump.
1144     // exception handler starts out as jump and can be patched to
1145     // a call be deoptimization.  (4932387)
1146     // Note that this value is also credited (in output.cpp) to
1147     // the size of the code section.
1148     return NativeJump::instruction_size;
1149   }
1150 
1151 #ifdef _LP64
1152   static uint size_deopt_handler() {
1153     // three 5 byte instructions plus one move for unreachable address.
1154     return 15+3;
1155   }
1156 #else
1157   static uint size_deopt_handler() {
1158     // NativeCall instruction size is the same as NativeJump.
1159     // exception handler starts out as jump and can be patched to
1160     // a call be deoptimization.  (4932387)
1161     // Note that this value is also credited (in output.cpp) to
1162     // the size of the code section.
1163     return 5 + NativeJump::instruction_size; // pushl(); jmp;
1164   }
1165 #endif
1166 };
1167 
1168 class Node::PD {
1169 public:
1170   enum NodeFlags {
1171     Flag_intel_jcc_erratum = Node::_last_flag << 1,
1172     _last_flag             = Flag_intel_jcc_erratum
1173   };
1174 };
1175 
1176 %} // end source_hpp
1177 
1178 source %{
1179 
1180 #include "opto/addnode.hpp"
1181 #include "c2_intelJccErratum_x86.hpp"
1182 
1183 void PhaseOutput::pd_perform_mach_node_analysis() {
1184   if (VM_Version::has_intel_jcc_erratum()) {
1185     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
1186     _buf_sizes._code += extra_padding;
1187   }
1188 }
1189 
1190 int MachNode::pd_alignment_required() const {
1191   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
1192     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
1193     return IntelJccErratum::largest_jcc_size() + 1;
1194   } else {
1195     return 1;
1196   }
1197 }
1198 
1199 int MachNode::compute_padding(int current_offset) const {
1200   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
1201     Compile* C = Compile::current();
1202     PhaseOutput* output = C->output();
1203     Block* block = output->block();
1204     int index = output->index();
1205     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
1206   } else {
1207     return 0;
1208   }
1209 }
1210 
1211 // Emit exception handler code.
1212 // Stuff framesize into a register and call a VM stub routine.
1213 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
1214 
1215   // Note that the code buffer's insts_mark is always relative to insts.
1216   // That's why we must use the macroassembler to generate a handler.
1217   C2_MacroAssembler _masm(&cbuf);
1218   address base = __ start_a_stub(size_exception_handler());
1219   if (base == NULL) {
1220     ciEnv::current()->record_failure("CodeCache is full");
1221     return 0;  // CodeBuffer::expand failed
1222   }
1223   int offset = __ offset();
1224   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1225   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1226   __ end_a_stub();
1227   return offset;
1228 }
1229 
1230 // Emit deopt handler code.
1231 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
1232 
1233   // Note that the code buffer's insts_mark is always relative to insts.
1234   // That's why we must use the macroassembler to generate a handler.
1235   C2_MacroAssembler _masm(&cbuf);
1236   address base = __ start_a_stub(size_deopt_handler());
1237   if (base == NULL) {
1238     ciEnv::current()->record_failure("CodeCache is full");
1239     return 0;  // CodeBuffer::expand failed
1240   }
1241   int offset = __ offset();
1242 
1243 #ifdef _LP64
1244   address the_pc = (address) __ pc();
1245   Label next;
1246   // push a "the_pc" on the stack without destroying any registers
1247   // as they all may be live.
1248 
1249   // push address of "next"
1250   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1251   __ bind(next);
1252   // adjust it so it matches "the_pc"
1253   __ subptr(Address(rsp, 0), __ offset() - offset);
1254 #else
1255   InternalAddress here(__ pc());
1256   __ pushptr(here.addr());
1257 #endif
1258 
1259   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1260   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
1261   __ end_a_stub();
1262   return offset;
1263 }
1264 
1265 
1266 //=============================================================================
1267 
1268   // Float masks come from different places depending on platform.
1269 #ifdef _LP64
1270   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
1271   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
1272   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
1273   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
1274 #else
1275   static address float_signmask()  { return (address)float_signmask_pool; }
1276   static address float_signflip()  { return (address)float_signflip_pool; }
1277   static address double_signmask() { return (address)double_signmask_pool; }
1278   static address double_signflip() { return (address)double_signflip_pool; }
1279 #endif
1280   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
1281   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
1282   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
1283 
1284 //=============================================================================
1285 const bool Matcher::match_rule_supported(int opcode) {
1286   if (!has_match_rule(opcode)) {
1287     return false; // no match rule present
1288   }
1289   switch (opcode) {
1290     case Op_AbsVL:
1291       if (UseAVX < 3) {
1292         return false;
1293       }
1294       break;
1295     case Op_PopCountI:
1296     case Op_PopCountL:
1297       if (!UsePopCountInstruction) {
1298         return false;
1299       }
1300       break;
1301     case Op_PopCountVI:
1302       if (!UsePopCountInstruction || !VM_Version::supports_avx512_vpopcntdq()) {
1303         return false;
1304       }
1305       break;
1306     case Op_MulVI:
1307       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
1308         return false;
1309       }
1310       break;
1311     case Op_MulVL:
1312     case Op_MulReductionVL:
1313       if (VM_Version::supports_avx512dq() == false) {
1314         return false;
1315       }
1316       break;
1317     case Op_AbsVB:
1318     case Op_AbsVS:
1319     case Op_AbsVI:
1320     case Op_AddReductionVI:
1321     case Op_AndReductionV:
1322     case Op_OrReductionV:
1323     case Op_XorReductionV:
1324       if (UseSSE < 3) { // requires at least SSSE3
1325         return false;
1326       }
1327       break;
1328     case Op_MulReductionVI:
1329       if (UseSSE < 4) { // requires at least SSE4
1330         return false;
1331       }
1332       break;
1333     case Op_SqrtVD:
1334     case Op_SqrtVF:
1335       if (UseAVX < 1) { // enabled for AVX only
1336         return false;
1337       }
1338       break;
1339     case Op_CompareAndSwapL:
1340 #ifdef _LP64
1341     case Op_CompareAndSwapP:
1342 #endif
1343       if (!VM_Version::supports_cx8()) {
1344         return false;
1345       }
1346       break;
1347     case Op_CMoveVF:
1348     case Op_CMoveVD:
1349       if (UseAVX < 1 || UseAVX > 2) {
1350         return false;
1351       }
1352       break;
1353     case Op_StrIndexOf:
1354       if (!UseSSE42Intrinsics) {
1355         return false;
1356       }
1357       break;
1358     case Op_StrIndexOfChar:
1359       if (!UseSSE42Intrinsics) {
1360         return false;
1361       }
1362       break;
1363     case Op_OnSpinWait:
1364       if (VM_Version::supports_on_spin_wait() == false) {
1365         return false;
1366       }
1367       break;
1368     case Op_MulVB:
1369     case Op_LShiftVB:
1370     case Op_RShiftVB:
1371     case Op_URShiftVB:
1372       if (UseSSE < 4) {
1373         return false;
1374       }
1375       break;
1376 #ifdef _LP64
1377     case Op_MaxD:
1378     case Op_MaxF:
1379     case Op_MinD:
1380     case Op_MinF:
1381       if (UseAVX < 1) { // enabled for AVX only
1382         return false;
1383       }
1384       break;
1385 #endif
1386     case Op_CacheWB:
1387     case Op_CacheWBPreSync:
1388     case Op_CacheWBPostSync:
1389       if (!VM_Version::supports_data_cache_line_flush()) {
1390         return false;
1391       }
1392       break;
1393     case Op_RoundDoubleMode:
1394       if (UseSSE < 4) {
1395         return false;
1396       }
1397       break;
1398     case Op_RoundDoubleModeV:
1399       if (VM_Version::supports_avx() == false) {
1400         return false; // 128bit vroundpd is not available
1401       }
1402       break;
1403     case Op_MacroLogicV:
1404       if (UseAVX < 3 || !UseVectorMacroLogic) {
1405         return false;
1406       }
1407       break;
1408 #ifndef _LP64
1409     case Op_AddReductionVF:
1410     case Op_AddReductionVD:
1411     case Op_MulReductionVF:
1412     case Op_MulReductionVD:
1413       if (UseSSE < 1) { // requires at least SSE
1414         return false;
1415       }
1416       break;
1417     case Op_MulAddVS2VI:
1418     case Op_RShiftVL:
1419     case Op_AbsVD:
1420     case Op_NegVD:
1421       if (UseSSE < 2) {
1422         return false;
1423       }
1424       break;
1425 #endif // !LP64
1426   }
1427   return true;  // Match rules are supported by default.
1428 }
1429 
1430 //------------------------------------------------------------------------
1431 
1432 // Identify extra cases that we might want to provide match rules for vector nodes and
1433 // other intrinsics guarded with vector length (vlen) and element type (bt).
1434 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
1435   if (!match_rule_supported(opcode)) {
1436     return false;
1437   }
1438   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
1439   //   * SSE2 supports 128bit vectors for all types;
1440   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
1441   //   * AVX2 supports 256bit vectors for all types;
1442   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
1443   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
1444   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
1445   // And MaxVectorSize is taken into account as well.
1446   if (!vector_size_supported(bt, vlen)) {
1447     return false;
1448   }
1449   // Special cases which require vector length follow:
1450   //   * implementation limitations
1451   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
1452   //   * 128bit vroundpd instruction is present only in AVX1
1453   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
1454   switch (opcode) {
1455     case Op_AbsVF:
1456     case Op_NegVF:
1457       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
1458         return false; // 512bit vandps and vxorps are not available
1459       }
1460       break;
1461     case Op_AbsVD:
1462     case Op_NegVD:
1463       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
1464         return false; // 512bit vandpd and vxorpd are not available
1465       }
1466       break;
1467     case Op_CMoveVF:
1468       if (vlen != 8) {
1469         return false; // implementation limitation (only vcmov8F_reg is present)
1470       }
1471       break;
1472     case Op_MacroLogicV:
1473       if (!VM_Version::supports_evex() ||
1474           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
1475         return false;
1476       }
1477       break;
1478     case Op_CMoveVD:
1479       if (vlen != 4) {
1480         return false; // implementation limitation (only vcmov4D_reg is present)
1481       }
1482       break;
1483   }
1484   return true;  // Per default match rules are supported.
1485 }
1486 
1487 // x86 supports generic vector operands: vec and legVec.
1488 const bool Matcher::supports_generic_vector_operands = true;
1489 
1490 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
1491   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
1492   bool legacy = (generic_opnd->opcode() == LEGVEC);
1493   if (!VM_Version::supports_avx512vlbwdq() && // KNL
1494       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
1495     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
1496     return new legVecZOper();
1497   }
1498   if (legacy) {
1499     switch (ideal_reg) {
1500       case Op_VecS: return new legVecSOper();
1501       case Op_VecD: return new legVecDOper();
1502       case Op_VecX: return new legVecXOper();
1503       case Op_VecY: return new legVecYOper();
1504       case Op_VecZ: return new legVecZOper();
1505     }
1506   } else {
1507     switch (ideal_reg) {
1508       case Op_VecS: return new vecSOper();
1509       case Op_VecD: return new vecDOper();
1510       case Op_VecX: return new vecXOper();
1511       case Op_VecY: return new vecYOper();
1512       case Op_VecZ: return new vecZOper();
1513     }
1514   }
1515   ShouldNotReachHere();
1516   return NULL;
1517 }
1518 
1519 bool Matcher::is_generic_reg2reg_move(MachNode* m) {
1520   switch (m->rule()) {
1521     case MoveVec2Leg_rule:
1522     case MoveLeg2Vec_rule:
1523       return true;
1524     default:
1525       return false;
1526   }
1527 }
1528 
1529 bool Matcher::is_generic_vector(MachOper* opnd) {
1530   switch (opnd->opcode()) {
1531     case VEC:
1532     case LEGVEC:
1533       return true;
1534     default:
1535       return false;
1536   }
1537 }
1538 
1539 //------------------------------------------------------------------------
1540 
1541 const bool Matcher::has_predicated_vectors(void) {
1542   bool ret_value = false;
1543   if (UseAVX > 2) {
1544     ret_value = VM_Version::supports_avx512vl();
1545   }
1546 
1547   return ret_value;
1548 }
1549 
1550 const int Matcher::float_pressure(int default_pressure_threshold) {
1551   int float_pressure_threshold = default_pressure_threshold;
1552 #ifdef _LP64
1553   if (UseAVX > 2) {
1554     // Increase pressure threshold on machines with AVX3 which have
1555     // 2x more XMM registers.
1556     float_pressure_threshold = default_pressure_threshold * 2;
1557   }
1558 #endif
1559   return float_pressure_threshold;
1560 }
1561 
1562 // Max vector size in bytes. 0 if not supported.
1563 const int Matcher::vector_width_in_bytes(BasicType bt) {
1564   assert(is_java_primitive(bt), "only primitive type vectors");
1565   if (UseSSE < 2) return 0;
1566   // SSE2 supports 128bit vectors for all types.
1567   // AVX2 supports 256bit vectors for all types.
1568   // AVX2/EVEX supports 512bit vectors for all types.
1569   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
1570   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
1571   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
1572     size = (UseAVX > 2) ? 64 : 32;
1573   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
1574     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
1575   // Use flag to limit vector size.
1576   size = MIN2(size,(int)MaxVectorSize);
1577   // Minimum 2 values in vector (or 4 for bytes).
1578   switch (bt) {
1579   case T_DOUBLE:
1580   case T_LONG:
1581     if (size < 16) return 0;
1582     break;
1583   case T_FLOAT:
1584   case T_INT:
1585     if (size < 8) return 0;
1586     break;
1587   case T_BOOLEAN:
1588     if (size < 4) return 0;
1589     break;
1590   case T_CHAR:
1591     if (size < 4) return 0;
1592     break;
1593   case T_BYTE:
1594     if (size < 4) return 0;
1595     break;
1596   case T_SHORT:
1597     if (size < 4) return 0;
1598     break;
1599   default:
1600     ShouldNotReachHere();
1601   }
1602   return size;
1603 }
1604 
1605 // Limits on vector size (number of elements) loaded into vector.
1606 const int Matcher::max_vector_size(const BasicType bt) {
1607   return vector_width_in_bytes(bt)/type2aelembytes(bt);
1608 }
1609 const int Matcher::min_vector_size(const BasicType bt) {
1610   int max_size = max_vector_size(bt);
1611   // Min size which can be loaded into vector is 4 bytes.
1612   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
1613   return MIN2(size,max_size);
1614 }
1615 
1616 // Vector ideal reg corresponding to specified size in bytes
1617 const uint Matcher::vector_ideal_reg(int size) {
1618   assert(MaxVectorSize >= size, "");
1619   switch(size) {
1620     case  4: return Op_VecS;
1621     case  8: return Op_VecD;
1622     case 16: return Op_VecX;
1623     case 32: return Op_VecY;
1624     case 64: return Op_VecZ;
1625   }
1626   ShouldNotReachHere();
1627   return 0;
1628 }
1629 
1630 // x86 supports misaligned vectors store/load.
1631 const bool Matcher::misaligned_vectors_ok() {
1632   return true;
1633 }
1634 
1635 // x86 AES instructions are compatible with SunJCE expanded
1636 // keys, hence we do not need to pass the original key to stubs
1637 const bool Matcher::pass_original_key_for_aes() {
1638   return false;
1639 }
1640 
1641 
1642 const bool Matcher::convi2l_type_required = true;
1643 
1644 // Check for shift by small constant as well
1645 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
1646   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
1647       shift->in(2)->get_int() <= 3 &&
1648       // Are there other uses besides address expressions?
1649       !matcher->is_visited(shift)) {
1650     address_visited.set(shift->_idx); // Flag as address_visited
1651     mstack.push(shift->in(2), Matcher::Visit);
1652     Node *conv = shift->in(1);
1653 #ifdef _LP64
1654     // Allow Matcher to match the rule which bypass
1655     // ConvI2L operation for an array index on LP64
1656     // if the index value is positive.
1657     if (conv->Opcode() == Op_ConvI2L &&
1658         conv->as_Type()->type()->is_long()->_lo >= 0 &&
1659         // Are there other uses besides address expressions?
1660         !matcher->is_visited(conv)) {
1661       address_visited.set(conv->_idx); // Flag as address_visited
1662       mstack.push(conv->in(1), Matcher::Pre_Visit);
1663     } else
1664 #endif
1665       mstack.push(conv, Matcher::Pre_Visit);
1666     return true;
1667   }
1668   return false;
1669 }
1670 
1671 // This function identifies sub-graphs in which a 'load' node is
1672 // input to two different nodes, and such that it can be matched
1673 // with BMI instructions like blsi, blsr, etc.
1674 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
1675 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
1676 // refers to the same node.
1677 //
1678 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
1679 // This is a temporary solution until we make DAGs expressible in ADL.
1680 template<typename ConType>
1681 class FusedPatternMatcher {
1682   Node* _op1_node;
1683   Node* _mop_node;
1684   int _con_op;
1685 
1686   static int match_next(Node* n, int next_op, int next_op_idx) {
1687     if (n->in(1) == NULL || n->in(2) == NULL) {
1688       return -1;
1689     }
1690 
1691     if (next_op_idx == -1) { // n is commutative, try rotations
1692       if (n->in(1)->Opcode() == next_op) {
1693         return 1;
1694       } else if (n->in(2)->Opcode() == next_op) {
1695         return 2;
1696       }
1697     } else {
1698       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
1699       if (n->in(next_op_idx)->Opcode() == next_op) {
1700         return next_op_idx;
1701       }
1702     }
1703     return -1;
1704   }
1705 
1706  public:
1707   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
1708     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
1709 
1710   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
1711              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
1712              typename ConType::NativeType con_value) {
1713     if (_op1_node->Opcode() != op1) {
1714       return false;
1715     }
1716     if (_mop_node->outcnt() > 2) {
1717       return false;
1718     }
1719     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
1720     if (op1_op2_idx == -1) {
1721       return false;
1722     }
1723     // Memory operation must be the other edge
1724     int op1_mop_idx = (op1_op2_idx & 1) + 1;
1725 
1726     // Check that the mop node is really what we want
1727     if (_op1_node->in(op1_mop_idx) == _mop_node) {
1728       Node* op2_node = _op1_node->in(op1_op2_idx);
1729       if (op2_node->outcnt() > 1) {
1730         return false;
1731       }
1732       assert(op2_node->Opcode() == op2, "Should be");
1733       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
1734       if (op2_con_idx == -1) {
1735         return false;
1736       }
1737       // Memory operation must be the other edge
1738       int op2_mop_idx = (op2_con_idx & 1) + 1;
1739       // Check that the memory operation is the same node
1740       if (op2_node->in(op2_mop_idx) == _mop_node) {
1741         // Now check the constant
1742         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
1743         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
1744           return true;
1745         }
1746       }
1747     }
1748     return false;
1749   }
1750 };
1751 
1752 static bool is_bmi_pattern(Node* n, Node* m) {
1753   assert(UseBMI1Instructions, "sanity");
1754   if (n != NULL && m != NULL) {
1755     if (m->Opcode() == Op_LoadI) {
1756       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
1757       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
1758              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
1759              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
1760     } else if (m->Opcode() == Op_LoadL) {
1761       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
1762       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
1763              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
1764              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
1765     }
1766   }
1767   return false;
1768 }
1769 
1770 // Should the matcher clone input 'm' of node 'n'?
1771 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
1772   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
1773   if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
1774     mstack.push(m, Visit);
1775     return true;
1776   }
1777   return false;
1778 }
1779 
1780 // Should the Matcher clone shifts on addressing modes, expecting them
1781 // to be subsumed into complex addressing expressions or compute them
1782 // into registers?
1783 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
1784   Node *off = m->in(AddPNode::Offset);
1785   if (off->is_Con()) {
1786     address_visited.test_set(m->_idx); // Flag as address_visited
1787     Node *adr = m->in(AddPNode::Address);
1788 
1789     // Intel can handle 2 adds in addressing mode
1790     // AtomicAdd is not an addressing expression.
1791     // Cheap to find it by looking for screwy base.
1792     if (adr->is_AddP() &&
1793         !adr->in(AddPNode::Base)->is_top() &&
1794         LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32
1795         // Are there other uses besides address expressions?
1796         !is_visited(adr)) {
1797       address_visited.set(adr->_idx); // Flag as address_visited
1798       Node *shift = adr->in(AddPNode::Offset);
1799       if (!clone_shift(shift, this, mstack, address_visited)) {
1800         mstack.push(shift, Pre_Visit);
1801       }
1802       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
1803       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
1804     } else {
1805       mstack.push(adr, Pre_Visit);
1806     }
1807 
1808     // Clone X+offset as it also folds into most addressing expressions
1809     mstack.push(off, Visit);
1810     mstack.push(m->in(AddPNode::Base), Pre_Visit);
1811     return true;
1812   } else if (clone_shift(off, this, mstack, address_visited)) {
1813     address_visited.test_set(m->_idx); // Flag as address_visited
1814     mstack.push(m->in(AddPNode::Address), Pre_Visit);
1815     mstack.push(m->in(AddPNode::Base), Pre_Visit);
1816     return true;
1817   }
1818   return false;
1819 }
1820 
1821 void Compile::reshape_address(AddPNode* addp) {
1822 }
1823 
1824 static inline uint vector_length(const MachNode* n) {
1825   const TypeVect* vt = n->bottom_type()->is_vect();
1826   return vt->length();
1827 }
1828 
1829 static inline uint vector_length(const MachNode* use, MachOper* opnd) {
1830   uint def_idx = use->operand_index(opnd);
1831   Node* def = use->in(def_idx);
1832   return def->bottom_type()->is_vect()->length();
1833 }
1834 
1835 static inline uint vector_length_in_bytes(const MachNode* n) {
1836   const TypeVect* vt = n->bottom_type()->is_vect();
1837   return vt->length_in_bytes();
1838 }
1839 
1840 static inline uint vector_length_in_bytes(const MachNode* use, MachOper* opnd) {
1841   uint def_idx = use->operand_index(opnd);
1842   Node* def = use->in(def_idx);
1843   return def->bottom_type()->is_vect()->length_in_bytes();
1844 }
1845 
1846 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* n) {
1847   switch(vector_length_in_bytes(n)) {
1848     case  4: // fall-through
1849     case  8: // fall-through
1850     case 16: return Assembler::AVX_128bit;
1851     case 32: return Assembler::AVX_256bit;
1852     case 64: return Assembler::AVX_512bit;
1853 
1854     default: {
1855       ShouldNotReachHere();
1856       return Assembler::AVX_NoVec;
1857     }
1858   }
1859 }
1860 
1861 // Helper methods for MachSpillCopyNode::implementation().
1862 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
1863                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
1864   // In 64-bit VM size calculation is very complex. Emitting instructions
1865   // into scratch buffer is used to get size in 64-bit VM.
1866   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
1867   assert(ireg == Op_VecS || // 32bit vector
1868          (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
1869          (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
1870          "no non-adjacent vector moves" );
1871   if (cbuf) {
1872     C2_MacroAssembler _masm(cbuf);
1873     int offset = __ offset();
1874     switch (ireg) {
1875     case Op_VecS: // copy whole register
1876     case Op_VecD:
1877     case Op_VecX:
1878 #ifndef _LP64
1879       __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1880 #else
1881       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
1882         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1883       } else {
1884         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
1885      }
1886 #endif
1887       break;
1888     case Op_VecY:
1889 #ifndef _LP64
1890       __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1891 #else
1892       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
1893         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1894       } else {
1895         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
1896      }
1897 #endif
1898       break;
1899     case Op_VecZ:
1900       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
1901       break;
1902     default:
1903       ShouldNotReachHere();
1904     }
1905     int size = __ offset() - offset;
1906 #ifdef ASSERT
1907     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
1908     assert(!do_size || size == 4, "incorrect size calculattion");
1909 #endif
1910     return size;
1911 #ifndef PRODUCT
1912   } else if (!do_size) {
1913     switch (ireg) {
1914     case Op_VecS:
1915     case Op_VecD:
1916     case Op_VecX:
1917       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
1918       break;
1919     case Op_VecY:
1920     case Op_VecZ:
1921       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
1922       break;
1923     default:
1924       ShouldNotReachHere();
1925     }
1926 #endif
1927   }
1928   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
1929   return (UseAVX > 2) ? 6 : 4;
1930 }
1931 
1932 int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
1933                      int stack_offset, int reg, uint ireg, outputStream* st) {
1934   // In 64-bit VM size calculation is very complex. Emitting instructions
1935   // into scratch buffer is used to get size in 64-bit VM.
1936   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
1937   if (cbuf) {
1938     C2_MacroAssembler _masm(cbuf);
1939     int offset = __ offset();
1940     if (is_load) {
1941       switch (ireg) {
1942       case Op_VecS:
1943         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1944         break;
1945       case Op_VecD:
1946         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1947         break;
1948       case Op_VecX:
1949 #ifndef _LP64
1950         __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1951 #else
1952         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
1953           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1954         } else {
1955           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
1956           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
1957         }
1958 #endif
1959         break;
1960       case Op_VecY:
1961 #ifndef _LP64
1962         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1963 #else
1964         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
1965           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1966         } else {
1967           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
1968           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
1969         }
1970 #endif
1971         break;
1972       case Op_VecZ:
1973         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
1974         break;
1975       default:
1976         ShouldNotReachHere();
1977       }
1978     } else { // store
1979       switch (ireg) {
1980       case Op_VecS:
1981         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1982         break;
1983       case Op_VecD:
1984         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1985         break;
1986       case Op_VecX:
1987 #ifndef _LP64
1988         __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1989 #else
1990         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
1991           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1992         }
1993         else {
1994           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
1995         }
1996 #endif
1997         break;
1998       case Op_VecY:
1999 #ifndef _LP64
2000         __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
2001 #else
2002         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
2003           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
2004         }
2005         else {
2006           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
2007         }
2008 #endif
2009         break;
2010       case Op_VecZ:
2011         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
2012         break;
2013       default:
2014         ShouldNotReachHere();
2015       }
2016     }
2017     int size = __ offset() - offset;
2018 #ifdef ASSERT
2019     int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4);
2020     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
2021     assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
2022 #endif
2023     return size;
2024 #ifndef PRODUCT
2025   } else if (!do_size) {
2026     if (is_load) {
2027       switch (ireg) {
2028       case Op_VecS:
2029         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
2030         break;
2031       case Op_VecD:
2032         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
2033         break;
2034        case Op_VecX:
2035         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
2036         break;
2037       case Op_VecY:
2038       case Op_VecZ:
2039         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
2040         break;
2041       default:
2042         ShouldNotReachHere();
2043       }
2044     } else { // store
2045       switch (ireg) {
2046       case Op_VecS:
2047         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
2048         break;
2049       case Op_VecD:
2050         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
2051         break;
2052        case Op_VecX:
2053         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
2054         break;
2055       case Op_VecY:
2056       case Op_VecZ:
2057         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
2058         break;
2059       default:
2060         ShouldNotReachHere();
2061       }
2062     }
2063 #endif
2064   }
2065   bool is_single_byte = false;
2066   int vec_len = 0;
2067   if ((UseAVX > 2) && (stack_offset != 0)) {
2068     int tuple_type = Assembler::EVEX_FVM;
2069     int input_size = Assembler::EVEX_32bit;
2070     switch (ireg) {
2071     case Op_VecS:
2072       tuple_type = Assembler::EVEX_T1S;
2073       break;
2074     case Op_VecD:
2075       tuple_type = Assembler::EVEX_T1S;
2076       input_size = Assembler::EVEX_64bit;
2077       break;
2078     case Op_VecX:
2079       break;
2080     case Op_VecY:
2081       vec_len = 1;
2082       break;
2083     case Op_VecZ:
2084       vec_len = 2;
2085       break;
2086     }
2087     is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0);
2088   }
2089   int offset_size = 0;
2090   int size = 5;
2091   if (UseAVX > 2 ) {
2092     if (VM_Version::supports_avx512novl() && (vec_len == 2)) {
2093       offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
2094       size += 2; // Need an additional two bytes for EVEX encoding
2095     } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) {
2096       offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4);
2097     } else {
2098       offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
2099       size += 2; // Need an additional two bytes for EVEX encodding
2100     }
2101   } else {
2102     offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4);
2103   }
2104   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
2105   return size+offset_size;
2106 }
2107 
2108 static inline jint replicate4_imm(int con, int width) {
2109   // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
2110   assert(width == 1 || width == 2, "only byte or short types here");
2111   int bit_width = width * 8;
2112   jint val = con;
2113   val &= (1 << bit_width) - 1;  // mask off sign bits
2114   while(bit_width < 32) {
2115     val |= (val << bit_width);
2116     bit_width <<= 1;
2117   }
2118   return val;
2119 }
2120 
2121 static inline jlong replicate8_imm(int con, int width) {
2122   // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
2123   assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
2124   int bit_width = width * 8;
2125   jlong val = con;
2126   val &= (((jlong) 1) << bit_width) - 1;  // mask off sign bits
2127   while(bit_width < 64) {
2128     val |= (val << bit_width);
2129     bit_width <<= 1;
2130   }
2131   return val;
2132 }
2133 
2134 #ifndef PRODUCT
2135   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2136     st->print("nop \t# %d bytes pad for loops and calls", _count);
2137   }
2138 #endif
2139 
2140   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2141     C2_MacroAssembler _masm(&cbuf);
2142     __ nop(_count);
2143   }
2144 
2145   uint MachNopNode::size(PhaseRegAlloc*) const {
2146     return _count;
2147   }
2148 
2149 #ifndef PRODUCT
2150   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
2151     st->print("# breakpoint");
2152   }
2153 #endif
2154 
2155   void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
2156     C2_MacroAssembler _masm(&cbuf);
2157     __ int3();
2158   }
2159 
2160   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
2161     return MachNode::size(ra_);
2162   }
2163 
2164 %}
2165 
2166 encode %{
2167 
2168   enc_class call_epilog %{
2169     if (VerifyStackAtCalls) {
2170       // Check that stack depth is unchanged: find majik cookie on stack
2171       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
2172       C2_MacroAssembler _masm(&cbuf);
2173       Label L;
2174       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
2175       __ jccb(Assembler::equal, L);
2176       // Die if stack mismatch
2177       __ int3();
2178       __ bind(L);
2179     }
2180   %}
2181 
2182 %}
2183 
2184 
2185 //----------OPERANDS-----------------------------------------------------------
2186 // Operand definitions must precede instruction definitions for correct parsing
2187 // in the ADLC because operands constitute user defined types which are used in
2188 // instruction definitions.
2189 
2190 // Vectors
2191 
2192 // Dummy generic vector class. Should be used for all vector operands.
2193 // Replaced with vec[SDXYZ] during post-selection pass.
2194 operand vec() %{
2195   constraint(ALLOC_IN_RC(dynamic));
2196   match(VecX);
2197   match(VecY);
2198   match(VecZ);
2199   match(VecS);
2200   match(VecD);
2201 
2202   format %{ %}
2203   interface(REG_INTER);
2204 %}
2205 
2206 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
2207 // Replaced with legVec[SDXYZ] during post-selection cleanup.
2208 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
2209 // runtime code generation via reg_class_dynamic.
2210 operand legVec() %{
2211   constraint(ALLOC_IN_RC(dynamic));
2212   match(VecX);
2213   match(VecY);
2214   match(VecZ);
2215   match(VecS);
2216   match(VecD);
2217 
2218   format %{ %}
2219   interface(REG_INTER);
2220 %}
2221 
2222 // Replaces vec during post-selection cleanup. See above.
2223 operand vecS() %{
2224   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
2225   match(VecS);
2226 
2227   format %{ %}
2228   interface(REG_INTER);
2229 %}
2230 
2231 // Replaces legVec during post-selection cleanup. See above.
2232 operand legVecS() %{
2233   constraint(ALLOC_IN_RC(vectors_reg_legacy));
2234   match(VecS);
2235 
2236   format %{ %}
2237   interface(REG_INTER);
2238 %}
2239 
2240 // Replaces vec during post-selection cleanup. See above.
2241 operand vecD() %{
2242   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
2243   match(VecD);
2244 
2245   format %{ %}
2246   interface(REG_INTER);
2247 %}
2248 
2249 // Replaces legVec during post-selection cleanup. See above.
2250 operand legVecD() %{
2251   constraint(ALLOC_IN_RC(vectord_reg_legacy));
2252   match(VecD);
2253 
2254   format %{ %}
2255   interface(REG_INTER);
2256 %}
2257 
2258 // Replaces vec during post-selection cleanup. See above.
2259 operand vecX() %{
2260   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
2261   match(VecX);
2262 
2263   format %{ %}
2264   interface(REG_INTER);
2265 %}
2266 
2267 // Replaces legVec during post-selection cleanup. See above.
2268 operand legVecX() %{
2269   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
2270   match(VecX);
2271 
2272   format %{ %}
2273   interface(REG_INTER);
2274 %}
2275 
2276 // Replaces vec during post-selection cleanup. See above.
2277 operand vecY() %{
2278   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
2279   match(VecY);
2280 
2281   format %{ %}
2282   interface(REG_INTER);
2283 %}
2284 
2285 // Replaces legVec during post-selection cleanup. See above.
2286 operand legVecY() %{
2287   constraint(ALLOC_IN_RC(vectory_reg_legacy));
2288   match(VecY);
2289 
2290   format %{ %}
2291   interface(REG_INTER);
2292 %}
2293 
2294 // Replaces vec during post-selection cleanup. See above.
2295 operand vecZ() %{
2296   constraint(ALLOC_IN_RC(vectorz_reg));
2297   match(VecZ);
2298 
2299   format %{ %}
2300   interface(REG_INTER);
2301 %}
2302 
2303 // Replaces legVec during post-selection cleanup. See above.
2304 operand legVecZ() %{
2305   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
2306   match(VecZ);
2307 
2308   format %{ %}
2309   interface(REG_INTER);
2310 %}
2311 
2312 // Comparison Code for FP conditional move
2313 operand cmpOp_vcmppd() %{
2314   match(Bool);
2315 
2316   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
2317             n->as_Bool()->_test._test != BoolTest::no_overflow);
2318   format %{ "" %}
2319   interface(COND_INTER) %{
2320     equal        (0x0, "eq");
2321     less         (0x1, "lt");
2322     less_equal   (0x2, "le");
2323     not_equal    (0xC, "ne");
2324     greater_equal(0xD, "ge");
2325     greater      (0xE, "gt");
2326     //TODO cannot compile (adlc breaks) without two next lines with error:
2327     // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{
2328     // equal' for overflow.
2329     overflow     (0x20, "o");  // not really supported by the instruction
2330     no_overflow  (0x21, "no"); // not really supported by the instruction
2331   %}
2332 %}
2333 
2334 
2335 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
2336 
2337 // ============================================================================
2338 
2339 instruct ShouldNotReachHere() %{
2340   match(Halt);
2341   format %{ "stop\t# ShouldNotReachHere" %}
2342   ins_encode %{
2343     if (is_reachable()) {
2344       __ stop(_halt_reason);
2345     }
2346   %}
2347   ins_pipe(pipe_slow);
2348 %}
2349 
2350 // =================================EVEX special===============================
2351 
2352 instruct setMask(rRegI dst, rRegI src) %{
2353   predicate(Matcher::has_predicated_vectors());
2354   match(Set dst (SetVectMaskI  src));
2355   effect(TEMP dst);
2356   format %{ "setvectmask   $dst, $src" %}
2357   ins_encode %{
2358     __ setvectmask($dst$$Register, $src$$Register);
2359   %}
2360   ins_pipe(pipe_slow);
2361 %}
2362 
2363 // ============================================================================
2364 
2365 instruct addF_reg(regF dst, regF src) %{
2366   predicate((UseSSE>=1) && (UseAVX == 0));
2367   match(Set dst (AddF dst src));
2368 
2369   format %{ "addss   $dst, $src" %}
2370   ins_cost(150);
2371   ins_encode %{
2372     __ addss($dst$$XMMRegister, $src$$XMMRegister);
2373   %}
2374   ins_pipe(pipe_slow);
2375 %}
2376 
2377 instruct addF_mem(regF dst, memory src) %{
2378   predicate((UseSSE>=1) && (UseAVX == 0));
2379   match(Set dst (AddF dst (LoadF src)));
2380 
2381   format %{ "addss   $dst, $src" %}
2382   ins_cost(150);
2383   ins_encode %{
2384     __ addss($dst$$XMMRegister, $src$$Address);
2385   %}
2386   ins_pipe(pipe_slow);
2387 %}
2388 
2389 instruct addF_imm(regF dst, immF con) %{
2390   predicate((UseSSE>=1) && (UseAVX == 0));
2391   match(Set dst (AddF dst con));
2392   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2393   ins_cost(150);
2394   ins_encode %{
2395     __ addss($dst$$XMMRegister, $constantaddress($con));
2396   %}
2397   ins_pipe(pipe_slow);
2398 %}
2399 
2400 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
2401   predicate(UseAVX > 0);
2402   match(Set dst (AddF src1 src2));
2403 
2404   format %{ "vaddss  $dst, $src1, $src2" %}
2405   ins_cost(150);
2406   ins_encode %{
2407     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2408   %}
2409   ins_pipe(pipe_slow);
2410 %}
2411 
2412 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
2413   predicate(UseAVX > 0);
2414   match(Set dst (AddF src1 (LoadF src2)));
2415 
2416   format %{ "vaddss  $dst, $src1, $src2" %}
2417   ins_cost(150);
2418   ins_encode %{
2419     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2420   %}
2421   ins_pipe(pipe_slow);
2422 %}
2423 
2424 instruct addF_reg_imm(regF dst, regF src, immF con) %{
2425   predicate(UseAVX > 0);
2426   match(Set dst (AddF src con));
2427 
2428   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2429   ins_cost(150);
2430   ins_encode %{
2431     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2432   %}
2433   ins_pipe(pipe_slow);
2434 %}
2435 
2436 instruct addD_reg(regD dst, regD src) %{
2437   predicate((UseSSE>=2) && (UseAVX == 0));
2438   match(Set dst (AddD dst src));
2439 
2440   format %{ "addsd   $dst, $src" %}
2441   ins_cost(150);
2442   ins_encode %{
2443     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
2444   %}
2445   ins_pipe(pipe_slow);
2446 %}
2447 
2448 instruct addD_mem(regD dst, memory src) %{
2449   predicate((UseSSE>=2) && (UseAVX == 0));
2450   match(Set dst (AddD dst (LoadD src)));
2451 
2452   format %{ "addsd   $dst, $src" %}
2453   ins_cost(150);
2454   ins_encode %{
2455     __ addsd($dst$$XMMRegister, $src$$Address);
2456   %}
2457   ins_pipe(pipe_slow);
2458 %}
2459 
2460 instruct addD_imm(regD dst, immD con) %{
2461   predicate((UseSSE>=2) && (UseAVX == 0));
2462   match(Set dst (AddD dst con));
2463   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2464   ins_cost(150);
2465   ins_encode %{
2466     __ addsd($dst$$XMMRegister, $constantaddress($con));
2467   %}
2468   ins_pipe(pipe_slow);
2469 %}
2470 
2471 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
2472   predicate(UseAVX > 0);
2473   match(Set dst (AddD src1 src2));
2474 
2475   format %{ "vaddsd  $dst, $src1, $src2" %}
2476   ins_cost(150);
2477   ins_encode %{
2478     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2479   %}
2480   ins_pipe(pipe_slow);
2481 %}
2482 
2483 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
2484   predicate(UseAVX > 0);
2485   match(Set dst (AddD src1 (LoadD src2)));
2486 
2487   format %{ "vaddsd  $dst, $src1, $src2" %}
2488   ins_cost(150);
2489   ins_encode %{
2490     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2491   %}
2492   ins_pipe(pipe_slow);
2493 %}
2494 
2495 instruct addD_reg_imm(regD dst, regD src, immD con) %{
2496   predicate(UseAVX > 0);
2497   match(Set dst (AddD src con));
2498 
2499   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2500   ins_cost(150);
2501   ins_encode %{
2502     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2503   %}
2504   ins_pipe(pipe_slow);
2505 %}
2506 
2507 instruct subF_reg(regF dst, regF src) %{
2508   predicate((UseSSE>=1) && (UseAVX == 0));
2509   match(Set dst (SubF dst src));
2510 
2511   format %{ "subss   $dst, $src" %}
2512   ins_cost(150);
2513   ins_encode %{
2514     __ subss($dst$$XMMRegister, $src$$XMMRegister);
2515   %}
2516   ins_pipe(pipe_slow);
2517 %}
2518 
2519 instruct subF_mem(regF dst, memory src) %{
2520   predicate((UseSSE>=1) && (UseAVX == 0));
2521   match(Set dst (SubF dst (LoadF src)));
2522 
2523   format %{ "subss   $dst, $src" %}
2524   ins_cost(150);
2525   ins_encode %{
2526     __ subss($dst$$XMMRegister, $src$$Address);
2527   %}
2528   ins_pipe(pipe_slow);
2529 %}
2530 
2531 instruct subF_imm(regF dst, immF con) %{
2532   predicate((UseSSE>=1) && (UseAVX == 0));
2533   match(Set dst (SubF dst con));
2534   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2535   ins_cost(150);
2536   ins_encode %{
2537     __ subss($dst$$XMMRegister, $constantaddress($con));
2538   %}
2539   ins_pipe(pipe_slow);
2540 %}
2541 
2542 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
2543   predicate(UseAVX > 0);
2544   match(Set dst (SubF src1 src2));
2545 
2546   format %{ "vsubss  $dst, $src1, $src2" %}
2547   ins_cost(150);
2548   ins_encode %{
2549     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2550   %}
2551   ins_pipe(pipe_slow);
2552 %}
2553 
2554 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
2555   predicate(UseAVX > 0);
2556   match(Set dst (SubF src1 (LoadF src2)));
2557 
2558   format %{ "vsubss  $dst, $src1, $src2" %}
2559   ins_cost(150);
2560   ins_encode %{
2561     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2562   %}
2563   ins_pipe(pipe_slow);
2564 %}
2565 
2566 instruct subF_reg_imm(regF dst, regF src, immF con) %{
2567   predicate(UseAVX > 0);
2568   match(Set dst (SubF src con));
2569 
2570   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2571   ins_cost(150);
2572   ins_encode %{
2573     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2574   %}
2575   ins_pipe(pipe_slow);
2576 %}
2577 
2578 instruct subD_reg(regD dst, regD src) %{
2579   predicate((UseSSE>=2) && (UseAVX == 0));
2580   match(Set dst (SubD dst src));
2581 
2582   format %{ "subsd   $dst, $src" %}
2583   ins_cost(150);
2584   ins_encode %{
2585     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
2586   %}
2587   ins_pipe(pipe_slow);
2588 %}
2589 
2590 instruct subD_mem(regD dst, memory src) %{
2591   predicate((UseSSE>=2) && (UseAVX == 0));
2592   match(Set dst (SubD dst (LoadD src)));
2593 
2594   format %{ "subsd   $dst, $src" %}
2595   ins_cost(150);
2596   ins_encode %{
2597     __ subsd($dst$$XMMRegister, $src$$Address);
2598   %}
2599   ins_pipe(pipe_slow);
2600 %}
2601 
2602 instruct subD_imm(regD dst, immD con) %{
2603   predicate((UseSSE>=2) && (UseAVX == 0));
2604   match(Set dst (SubD dst con));
2605   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2606   ins_cost(150);
2607   ins_encode %{
2608     __ subsd($dst$$XMMRegister, $constantaddress($con));
2609   %}
2610   ins_pipe(pipe_slow);
2611 %}
2612 
2613 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
2614   predicate(UseAVX > 0);
2615   match(Set dst (SubD src1 src2));
2616 
2617   format %{ "vsubsd  $dst, $src1, $src2" %}
2618   ins_cost(150);
2619   ins_encode %{
2620     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2621   %}
2622   ins_pipe(pipe_slow);
2623 %}
2624 
2625 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
2626   predicate(UseAVX > 0);
2627   match(Set dst (SubD src1 (LoadD src2)));
2628 
2629   format %{ "vsubsd  $dst, $src1, $src2" %}
2630   ins_cost(150);
2631   ins_encode %{
2632     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2633   %}
2634   ins_pipe(pipe_slow);
2635 %}
2636 
2637 instruct subD_reg_imm(regD dst, regD src, immD con) %{
2638   predicate(UseAVX > 0);
2639   match(Set dst (SubD src con));
2640 
2641   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2642   ins_cost(150);
2643   ins_encode %{
2644     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2645   %}
2646   ins_pipe(pipe_slow);
2647 %}
2648 
2649 instruct mulF_reg(regF dst, regF src) %{
2650   predicate((UseSSE>=1) && (UseAVX == 0));
2651   match(Set dst (MulF dst src));
2652 
2653   format %{ "mulss   $dst, $src" %}
2654   ins_cost(150);
2655   ins_encode %{
2656     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
2657   %}
2658   ins_pipe(pipe_slow);
2659 %}
2660 
2661 instruct mulF_mem(regF dst, memory src) %{
2662   predicate((UseSSE>=1) && (UseAVX == 0));
2663   match(Set dst (MulF dst (LoadF src)));
2664 
2665   format %{ "mulss   $dst, $src" %}
2666   ins_cost(150);
2667   ins_encode %{
2668     __ mulss($dst$$XMMRegister, $src$$Address);
2669   %}
2670   ins_pipe(pipe_slow);
2671 %}
2672 
2673 instruct mulF_imm(regF dst, immF con) %{
2674   predicate((UseSSE>=1) && (UseAVX == 0));
2675   match(Set dst (MulF dst con));
2676   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2677   ins_cost(150);
2678   ins_encode %{
2679     __ mulss($dst$$XMMRegister, $constantaddress($con));
2680   %}
2681   ins_pipe(pipe_slow);
2682 %}
2683 
2684 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
2685   predicate(UseAVX > 0);
2686   match(Set dst (MulF src1 src2));
2687 
2688   format %{ "vmulss  $dst, $src1, $src2" %}
2689   ins_cost(150);
2690   ins_encode %{
2691     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2692   %}
2693   ins_pipe(pipe_slow);
2694 %}
2695 
2696 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
2697   predicate(UseAVX > 0);
2698   match(Set dst (MulF src1 (LoadF src2)));
2699 
2700   format %{ "vmulss  $dst, $src1, $src2" %}
2701   ins_cost(150);
2702   ins_encode %{
2703     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2704   %}
2705   ins_pipe(pipe_slow);
2706 %}
2707 
2708 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
2709   predicate(UseAVX > 0);
2710   match(Set dst (MulF src con));
2711 
2712   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2713   ins_cost(150);
2714   ins_encode %{
2715     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2716   %}
2717   ins_pipe(pipe_slow);
2718 %}
2719 
2720 instruct mulD_reg(regD dst, regD src) %{
2721   predicate((UseSSE>=2) && (UseAVX == 0));
2722   match(Set dst (MulD dst src));
2723 
2724   format %{ "mulsd   $dst, $src" %}
2725   ins_cost(150);
2726   ins_encode %{
2727     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
2728   %}
2729   ins_pipe(pipe_slow);
2730 %}
2731 
2732 instruct mulD_mem(regD dst, memory src) %{
2733   predicate((UseSSE>=2) && (UseAVX == 0));
2734   match(Set dst (MulD dst (LoadD src)));
2735 
2736   format %{ "mulsd   $dst, $src" %}
2737   ins_cost(150);
2738   ins_encode %{
2739     __ mulsd($dst$$XMMRegister, $src$$Address);
2740   %}
2741   ins_pipe(pipe_slow);
2742 %}
2743 
2744 instruct mulD_imm(regD dst, immD con) %{
2745   predicate((UseSSE>=2) && (UseAVX == 0));
2746   match(Set dst (MulD dst con));
2747   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2748   ins_cost(150);
2749   ins_encode %{
2750     __ mulsd($dst$$XMMRegister, $constantaddress($con));
2751   %}
2752   ins_pipe(pipe_slow);
2753 %}
2754 
2755 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
2756   predicate(UseAVX > 0);
2757   match(Set dst (MulD src1 src2));
2758 
2759   format %{ "vmulsd  $dst, $src1, $src2" %}
2760   ins_cost(150);
2761   ins_encode %{
2762     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2763   %}
2764   ins_pipe(pipe_slow);
2765 %}
2766 
2767 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
2768   predicate(UseAVX > 0);
2769   match(Set dst (MulD src1 (LoadD src2)));
2770 
2771   format %{ "vmulsd  $dst, $src1, $src2" %}
2772   ins_cost(150);
2773   ins_encode %{
2774     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2775   %}
2776   ins_pipe(pipe_slow);
2777 %}
2778 
2779 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
2780   predicate(UseAVX > 0);
2781   match(Set dst (MulD src con));
2782 
2783   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2784   ins_cost(150);
2785   ins_encode %{
2786     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2787   %}
2788   ins_pipe(pipe_slow);
2789 %}
2790 
2791 instruct divF_reg(regF dst, regF src) %{
2792   predicate((UseSSE>=1) && (UseAVX == 0));
2793   match(Set dst (DivF dst src));
2794 
2795   format %{ "divss   $dst, $src" %}
2796   ins_cost(150);
2797   ins_encode %{
2798     __ divss($dst$$XMMRegister, $src$$XMMRegister);
2799   %}
2800   ins_pipe(pipe_slow);
2801 %}
2802 
2803 instruct divF_mem(regF dst, memory src) %{
2804   predicate((UseSSE>=1) && (UseAVX == 0));
2805   match(Set dst (DivF dst (LoadF src)));
2806 
2807   format %{ "divss   $dst, $src" %}
2808   ins_cost(150);
2809   ins_encode %{
2810     __ divss($dst$$XMMRegister, $src$$Address);
2811   %}
2812   ins_pipe(pipe_slow);
2813 %}
2814 
2815 instruct divF_imm(regF dst, immF con) %{
2816   predicate((UseSSE>=1) && (UseAVX == 0));
2817   match(Set dst (DivF dst con));
2818   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2819   ins_cost(150);
2820   ins_encode %{
2821     __ divss($dst$$XMMRegister, $constantaddress($con));
2822   %}
2823   ins_pipe(pipe_slow);
2824 %}
2825 
2826 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
2827   predicate(UseAVX > 0);
2828   match(Set dst (DivF src1 src2));
2829 
2830   format %{ "vdivss  $dst, $src1, $src2" %}
2831   ins_cost(150);
2832   ins_encode %{
2833     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2834   %}
2835   ins_pipe(pipe_slow);
2836 %}
2837 
2838 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
2839   predicate(UseAVX > 0);
2840   match(Set dst (DivF src1 (LoadF src2)));
2841 
2842   format %{ "vdivss  $dst, $src1, $src2" %}
2843   ins_cost(150);
2844   ins_encode %{
2845     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2846   %}
2847   ins_pipe(pipe_slow);
2848 %}
2849 
2850 instruct divF_reg_imm(regF dst, regF src, immF con) %{
2851   predicate(UseAVX > 0);
2852   match(Set dst (DivF src con));
2853 
2854   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2855   ins_cost(150);
2856   ins_encode %{
2857     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2858   %}
2859   ins_pipe(pipe_slow);
2860 %}
2861 
2862 instruct divD_reg(regD dst, regD src) %{
2863   predicate((UseSSE>=2) && (UseAVX == 0));
2864   match(Set dst (DivD dst src));
2865 
2866   format %{ "divsd   $dst, $src" %}
2867   ins_cost(150);
2868   ins_encode %{
2869     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
2870   %}
2871   ins_pipe(pipe_slow);
2872 %}
2873 
2874 instruct divD_mem(regD dst, memory src) %{
2875   predicate((UseSSE>=2) && (UseAVX == 0));
2876   match(Set dst (DivD dst (LoadD src)));
2877 
2878   format %{ "divsd   $dst, $src" %}
2879   ins_cost(150);
2880   ins_encode %{
2881     __ divsd($dst$$XMMRegister, $src$$Address);
2882   %}
2883   ins_pipe(pipe_slow);
2884 %}
2885 
2886 instruct divD_imm(regD dst, immD con) %{
2887   predicate((UseSSE>=2) && (UseAVX == 0));
2888   match(Set dst (DivD dst con));
2889   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2890   ins_cost(150);
2891   ins_encode %{
2892     __ divsd($dst$$XMMRegister, $constantaddress($con));
2893   %}
2894   ins_pipe(pipe_slow);
2895 %}
2896 
2897 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
2898   predicate(UseAVX > 0);
2899   match(Set dst (DivD src1 src2));
2900 
2901   format %{ "vdivsd  $dst, $src1, $src2" %}
2902   ins_cost(150);
2903   ins_encode %{
2904     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2905   %}
2906   ins_pipe(pipe_slow);
2907 %}
2908 
2909 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
2910   predicate(UseAVX > 0);
2911   match(Set dst (DivD src1 (LoadD src2)));
2912 
2913   format %{ "vdivsd  $dst, $src1, $src2" %}
2914   ins_cost(150);
2915   ins_encode %{
2916     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2917   %}
2918   ins_pipe(pipe_slow);
2919 %}
2920 
2921 instruct divD_reg_imm(regD dst, regD src, immD con) %{
2922   predicate(UseAVX > 0);
2923   match(Set dst (DivD src con));
2924 
2925   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2926   ins_cost(150);
2927   ins_encode %{
2928     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2929   %}
2930   ins_pipe(pipe_slow);
2931 %}
2932 
2933 instruct absF_reg(regF dst) %{
2934   predicate((UseSSE>=1) && (UseAVX == 0));
2935   match(Set dst (AbsF dst));
2936   ins_cost(150);
2937   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
2938   ins_encode %{
2939     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
2940   %}
2941   ins_pipe(pipe_slow);
2942 %}
2943 
2944 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
2945   predicate(UseAVX > 0);
2946   match(Set dst (AbsF src));
2947   ins_cost(150);
2948   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
2949   ins_encode %{
2950     int vector_len = 0;
2951     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
2952               ExternalAddress(float_signmask()), vector_len);
2953   %}
2954   ins_pipe(pipe_slow);
2955 %}
2956 
2957 instruct absD_reg(regD dst) %{
2958   predicate((UseSSE>=2) && (UseAVX == 0));
2959   match(Set dst (AbsD dst));
2960   ins_cost(150);
2961   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
2962             "# abs double by sign masking" %}
2963   ins_encode %{
2964     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
2965   %}
2966   ins_pipe(pipe_slow);
2967 %}
2968 
2969 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
2970   predicate(UseAVX > 0);
2971   match(Set dst (AbsD src));
2972   ins_cost(150);
2973   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
2974             "# abs double by sign masking" %}
2975   ins_encode %{
2976     int vector_len = 0;
2977     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
2978               ExternalAddress(double_signmask()), vector_len);
2979   %}
2980   ins_pipe(pipe_slow);
2981 %}
2982 
2983 instruct negF_reg(regF dst) %{
2984   predicate((UseSSE>=1) && (UseAVX == 0));
2985   match(Set dst (NegF dst));
2986   ins_cost(150);
2987   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
2988   ins_encode %{
2989     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
2990   %}
2991   ins_pipe(pipe_slow);
2992 %}
2993 
2994 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
2995   predicate(UseAVX > 0);
2996   match(Set dst (NegF src));
2997   ins_cost(150);
2998   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
2999   ins_encode %{
3000     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
3001                  ExternalAddress(float_signflip()));
3002   %}
3003   ins_pipe(pipe_slow);
3004 %}
3005 
3006 instruct negD_reg(regD dst) %{
3007   predicate((UseSSE>=2) && (UseAVX == 0));
3008   match(Set dst (NegD dst));
3009   ins_cost(150);
3010   format %{ "xorpd   $dst, [0x8000000000000000]\t"
3011             "# neg double by sign flipping" %}
3012   ins_encode %{
3013     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
3014   %}
3015   ins_pipe(pipe_slow);
3016 %}
3017 
3018 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
3019   predicate(UseAVX > 0);
3020   match(Set dst (NegD src));
3021   ins_cost(150);
3022   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
3023             "# neg double by sign flipping" %}
3024   ins_encode %{
3025     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
3026                  ExternalAddress(double_signflip()));
3027   %}
3028   ins_pipe(pipe_slow);
3029 %}
3030 
3031 instruct sqrtF_reg(regF dst, regF src) %{
3032   predicate(UseSSE>=1);
3033   match(Set dst (SqrtF src));
3034 
3035   format %{ "sqrtss  $dst, $src" %}
3036   ins_cost(150);
3037   ins_encode %{
3038     __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
3039   %}
3040   ins_pipe(pipe_slow);
3041 %}
3042 
3043 instruct sqrtF_mem(regF dst, memory src) %{
3044   predicate(UseSSE>=1);
3045   match(Set dst (SqrtF (LoadF src)));
3046 
3047   format %{ "sqrtss  $dst, $src" %}
3048   ins_cost(150);
3049   ins_encode %{
3050     __ sqrtss($dst$$XMMRegister, $src$$Address);
3051   %}
3052   ins_pipe(pipe_slow);
3053 %}
3054 
3055 instruct sqrtF_imm(regF dst, immF con) %{
3056   predicate(UseSSE>=1);
3057   match(Set dst (SqrtF con));
3058 
3059   format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
3060   ins_cost(150);
3061   ins_encode %{
3062     __ sqrtss($dst$$XMMRegister, $constantaddress($con));
3063   %}
3064   ins_pipe(pipe_slow);
3065 %}
3066 
3067 instruct sqrtD_reg(regD dst, regD src) %{
3068   predicate(UseSSE>=2);
3069   match(Set dst (SqrtD src));
3070 
3071   format %{ "sqrtsd  $dst, $src" %}
3072   ins_cost(150);
3073   ins_encode %{
3074     __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
3075   %}
3076   ins_pipe(pipe_slow);
3077 %}
3078 
3079 instruct sqrtD_mem(regD dst, memory src) %{
3080   predicate(UseSSE>=2);
3081   match(Set dst (SqrtD (LoadD src)));
3082 
3083   format %{ "sqrtsd  $dst, $src" %}
3084   ins_cost(150);
3085   ins_encode %{
3086     __ sqrtsd($dst$$XMMRegister, $src$$Address);
3087   %}
3088   ins_pipe(pipe_slow);
3089 %}
3090 
3091 instruct sqrtD_imm(regD dst, immD con) %{
3092   predicate(UseSSE>=2);
3093   match(Set dst (SqrtD con));
3094   format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
3095   ins_cost(150);
3096   ins_encode %{
3097     __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
3098   %}
3099   ins_pipe(pipe_slow);
3100 %}
3101 
3102 
3103 #ifdef _LP64
3104 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
3105   match(Set dst (RoundDoubleMode src rmode));
3106   format %{ "roundsd $dst,$src" %}
3107   ins_cost(150);
3108   ins_encode %{
3109     assert(UseSSE >= 4, "required");
3110     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
3111   %}
3112   ins_pipe(pipe_slow);
3113 %}
3114 
3115 instruct roundD_mem(legRegD dst, memory src, immU8 rmode) %{
3116   match(Set dst (RoundDoubleMode (LoadD src) rmode));
3117   format %{ "roundsd $dst,$src" %}
3118   ins_cost(150);
3119   ins_encode %{
3120     assert(UseSSE >= 4, "required");
3121     __ roundsd($dst$$XMMRegister, $src$$Address, $rmode$$constant);
3122   %}
3123   ins_pipe(pipe_slow);
3124 %}
3125 
3126 instruct roundD_imm(legRegD dst, immD con, immU8 rmode, rRegI scratch_reg) %{
3127   match(Set dst (RoundDoubleMode con rmode));
3128   effect(TEMP scratch_reg);
3129   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
3130   ins_cost(150);
3131   ins_encode %{
3132     assert(UseSSE >= 4, "required");
3133     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, $scratch_reg$$Register);
3134   %}
3135   ins_pipe(pipe_slow);
3136 %}
3137 
3138 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
3139   predicate(n->as_Vector()->length() < 8);
3140   match(Set dst (RoundDoubleModeV src rmode));
3141   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
3142   ins_encode %{
3143     assert(UseAVX > 0, "required");
3144     int vector_len = vector_length_encoding(this);
3145     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vector_len);
3146   %}
3147   ins_pipe( pipe_slow );
3148 %}
3149 
3150 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
3151   predicate(n->as_Vector()->length() == 8);
3152   match(Set dst (RoundDoubleModeV src rmode));
3153   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
3154   ins_encode %{
3155     assert(UseAVX > 2, "required");
3156     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
3157   %}
3158   ins_pipe( pipe_slow );
3159 %}
3160 
3161 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
3162   predicate(n->as_Vector()->length() < 8);
3163   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
3164   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
3165   ins_encode %{
3166     assert(UseAVX > 0, "required");
3167     int vector_len = vector_length_encoding(this);
3168     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vector_len);
3169   %}
3170   ins_pipe( pipe_slow );
3171 %}
3172 
3173 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
3174   predicate(n->as_Vector()->length() == 8);
3175   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
3176   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
3177   ins_encode %{
3178     assert(UseAVX > 2, "required");
3179     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
3180   %}
3181   ins_pipe( pipe_slow );
3182 %}
3183 #endif // _LP64
3184 
3185 instruct onspinwait() %{
3186   match(OnSpinWait);
3187   ins_cost(200);
3188 
3189   format %{
3190     $$template
3191     $$emit$$"pause\t! membar_onspinwait"
3192   %}
3193   ins_encode %{
3194     __ pause();
3195   %}
3196   ins_pipe(pipe_slow);
3197 %}
3198 
3199 // a * b + c
3200 instruct fmaD_reg(regD a, regD b, regD c) %{
3201   predicate(UseFMA);
3202   match(Set c (FmaD  c (Binary a b)));
3203   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
3204   ins_cost(150);
3205   ins_encode %{
3206     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
3207   %}
3208   ins_pipe( pipe_slow );
3209 %}
3210 
3211 // a * b + c
3212 instruct fmaF_reg(regF a, regF b, regF c) %{
3213   predicate(UseFMA);
3214   match(Set c (FmaF  c (Binary a b)));
3215   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
3216   ins_cost(150);
3217   ins_encode %{
3218     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
3219   %}
3220   ins_pipe( pipe_slow );
3221 %}
3222 
3223 // ====================VECTOR INSTRUCTIONS=====================================
3224 
3225 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
3226 instruct MoveVec2Leg(legVec dst, vec src) %{
3227   match(Set dst src);
3228   format %{ "" %}
3229   ins_encode %{
3230     ShouldNotReachHere();
3231   %}
3232   ins_pipe( fpu_reg_reg );
3233 %}
3234 
3235 instruct MoveLeg2Vec(vec dst, legVec src) %{
3236   match(Set dst src);
3237   format %{ "" %}
3238   ins_encode %{
3239     ShouldNotReachHere();
3240   %}
3241   ins_pipe( fpu_reg_reg );
3242 %}
3243 
3244 // ============================================================================
3245 
3246 // Load vectors
3247 instruct loadV(vec dst, memory mem) %{
3248   match(Set dst (LoadVector mem));
3249   ins_cost(125);
3250   format %{ "load_vector $dst,$mem" %}
3251   ins_encode %{
3252     switch (vector_length_in_bytes(this)) {
3253       case  4: __ movdl    ($dst$$XMMRegister, $mem$$Address); break;
3254       case  8: __ movq     ($dst$$XMMRegister, $mem$$Address); break;
3255       case 16: __ movdqu   ($dst$$XMMRegister, $mem$$Address); break;
3256       case 32: __ vmovdqu  ($dst$$XMMRegister, $mem$$Address); break;
3257       case 64: __ evmovdqul($dst$$XMMRegister, $mem$$Address, Assembler::AVX_512bit); break;
3258       default: ShouldNotReachHere();
3259     }
3260   %}
3261   ins_pipe( pipe_slow );
3262 %}
3263 
3264 // Store vectors generic operand pattern.
3265 instruct storeV(memory mem, vec src) %{
3266   match(Set mem (StoreVector mem src));
3267   ins_cost(145);
3268   format %{ "store_vector $mem,$src\n\t" %}
3269   ins_encode %{
3270     switch (vector_length_in_bytes(this, $src)) {
3271       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
3272       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
3273       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
3274       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
3275       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
3276       default: ShouldNotReachHere();
3277     }
3278   %}
3279   ins_pipe( pipe_slow );
3280 %}
3281 
3282 // ====================REPLICATE=======================================
3283 
3284 // Replicate byte scalar to be vector
3285 instruct ReplB_reg(vec dst, rRegI src) %{
3286   match(Set dst (ReplicateB src));
3287   format %{ "replicateB $dst,$src" %}
3288   ins_encode %{
3289     uint vlen = vector_length(this);
3290     if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
3291       assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
3292       int vlen_enc = vector_length_encoding(this);
3293       __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
3294     } else {
3295       __ movdl($dst$$XMMRegister, $src$$Register);
3296       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
3297       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3298       if (vlen >= 16) {
3299         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3300         if (vlen >= 32) {
3301           assert(vlen == 32, "sanity");
3302           __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
3303         }
3304       }
3305     }
3306   %}
3307   ins_pipe( pipe_slow );
3308 %}
3309 
3310 instruct ReplB_mem(vec dst, memory mem) %{
3311   predicate(VM_Version::supports_avx2());
3312   match(Set dst (ReplicateB (LoadB mem)));
3313   format %{ "replicateB $dst,$mem" %}
3314   ins_encode %{
3315     int vector_len = vector_length_encoding(this);
3316     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
3317   %}
3318   ins_pipe( pipe_slow );
3319 %}
3320 
3321 instruct ReplB_imm(vec dst, immI con) %{
3322   match(Set dst (ReplicateB con));
3323   format %{ "replicateB $dst,$con" %}
3324   ins_encode %{
3325     uint vlen = vector_length(this);
3326     InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 1));
3327     if (vlen == 4) {
3328       __ movdl($dst$$XMMRegister, const_addr);
3329     } else {
3330       __ movq($dst$$XMMRegister, const_addr);
3331       if (vlen >= 16) {
3332         if (VM_Version::supports_avx2()) {
3333           int vlen_enc = vector_length_encoding(this);
3334           __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
3335         } else {
3336           assert(vlen == 16, "sanity");
3337           __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3338         }
3339       }
3340     }
3341   %}
3342   ins_pipe( pipe_slow );
3343 %}
3344 
3345 // Replicate byte scalar zero to be vector
3346 instruct ReplB_zero(vec dst, immI0 zero) %{
3347   match(Set dst (ReplicateB zero));
3348   format %{ "replicateB $dst,$zero" %}
3349   ins_encode %{
3350     uint vlen = vector_length(this);
3351     if (vlen <= 16) {
3352       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3353     } else {
3354       // Use vpxor since AVX512F does not have 512bit vxorpd (requires AVX512DQ).
3355       int vlen_enc = vector_length_encoding(this);
3356       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
3357     }
3358   %}
3359   ins_pipe( fpu_reg_reg );
3360 %}
3361 
3362 // ====================ReplicateS=======================================
3363 
3364 instruct ReplS_reg(vec dst, rRegI src) %{
3365   match(Set dst (ReplicateS src));
3366   format %{ "replicateS $dst,$src" %}
3367   ins_encode %{
3368     uint vlen = vector_length(this);
3369     if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
3370       assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
3371       int vlen_enc = vector_length_encoding(this);
3372       __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
3373     } else {
3374       __ movdl($dst$$XMMRegister, $src$$Register);
3375       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3376       if (vlen >= 8) {
3377         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3378         if (vlen >= 16) {
3379           assert(vlen == 16, "sanity");
3380           __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
3381         }
3382       }
3383     }
3384   %}
3385   ins_pipe( pipe_slow );
3386 %}
3387 
3388 instruct ReplS_mem(vec dst, memory mem) %{
3389   predicate(VM_Version::supports_avx2());
3390   match(Set dst (ReplicateS (LoadS mem)));
3391   format %{ "replicateS $dst,$mem" %}
3392   ins_encode %{
3393     int vlen_enc = vector_length_encoding(this);
3394     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
3395   %}
3396   ins_pipe( pipe_slow );
3397 %}
3398 
3399 instruct ReplS_imm(vec dst, immI con) %{
3400   match(Set dst (ReplicateS con));
3401   format %{ "replicateS $dst,$con" %}
3402   ins_encode %{
3403     uint vlen = vector_length(this);
3404     InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 2));
3405     if (vlen == 2) {
3406       __ movdl($dst$$XMMRegister, const_addr);
3407     } else {
3408       __ movq($dst$$XMMRegister, const_addr);
3409       if (vlen >= 8) {
3410         if (VM_Version::supports_avx2()) {
3411           int vlen_enc = vector_length_encoding(this);
3412           __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
3413         } else {
3414           assert(vlen == 8, "sanity");
3415           __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3416         }
3417       }
3418     }
3419   %}
3420   ins_pipe( fpu_reg_reg );
3421 %}
3422 
3423 instruct ReplS_zero(vec dst, immI0 zero) %{
3424   match(Set dst (ReplicateS zero));
3425   format %{ "replicateS $dst,$zero" %}
3426   ins_encode %{
3427     uint vlen = vector_length(this);
3428     if (vlen <= 8) {
3429       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3430     } else {
3431       int vlen_enc = vector_length_encoding(this);
3432       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
3433     }
3434   %}
3435   ins_pipe( fpu_reg_reg );
3436 %}
3437 
3438 // ====================ReplicateI=======================================
3439 
3440 instruct ReplI_reg(vec dst, rRegI src) %{
3441   match(Set dst (ReplicateI src));
3442   format %{ "replicateI $dst,$src" %}
3443   ins_encode %{
3444     uint vlen = vector_length(this);
3445     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
3446       int vlen_enc = vector_length_encoding(this);
3447       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
3448     } else {
3449       __ movdl($dst$$XMMRegister, $src$$Register);
3450       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3451       if (vlen >= 8) {
3452         assert(vlen == 8, "sanity");
3453         __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
3454       }
3455     }
3456   %}
3457   ins_pipe( pipe_slow );
3458 %}
3459 
3460 instruct ReplI_mem(vec dst, memory mem) %{
3461   match(Set dst (ReplicateI (LoadI mem)));
3462   format %{ "replicateI $dst,$mem" %}
3463   ins_encode %{
3464     uint vlen = vector_length(this);
3465     if (vlen <= 4) {
3466       __ movdl($dst$$XMMRegister, $mem$$Address);
3467       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3468     } else {
3469       assert(VM_Version::supports_avx2(), "sanity");
3470       int vector_len = vector_length_encoding(this);
3471       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
3472     }
3473   %}
3474   ins_pipe( pipe_slow );
3475 %}
3476 
3477 instruct ReplI_imm(vec dst, immI con) %{
3478   match(Set dst (ReplicateI con));
3479   format %{ "replicateI $dst,$con" %}
3480   ins_encode %{
3481     uint vlen = vector_length(this);
3482     InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 4));
3483     if (vlen <= 4) {
3484       __ movq($dst$$XMMRegister, const_addr);
3485       if (vlen == 4) {
3486         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3487       }
3488     } else {
3489       assert(VM_Version::supports_avx2(), "sanity");
3490       int vector_len = vector_length_encoding(this);
3491       __ movq($dst$$XMMRegister, const_addr);
3492       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3493     }
3494   %}
3495   ins_pipe( pipe_slow );
3496 %}
3497 
3498 // Replicate integer (4 byte) scalar zero to be vector
3499 instruct ReplI_zero(vec dst, immI0 zero) %{
3500   match(Set dst (ReplicateI zero));
3501   format %{ "replicateI $dst,$zero" %}
3502   ins_encode %{
3503     uint vlen = vector_length(this);
3504     if (vlen <= 4) {
3505       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3506     } else {
3507       int vlen_enc = vector_length_encoding(this);
3508       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
3509     }
3510   %}
3511   ins_pipe( fpu_reg_reg );
3512 %}
3513 
3514 instruct ReplI_M1(vec dst, immI_M1 con) %{
3515   predicate(UseAVX > 0);
3516   match(Set dst (ReplicateB con));
3517   match(Set dst (ReplicateS con));
3518   match(Set dst (ReplicateI con));
3519   effect(TEMP dst);
3520   format %{ "vallones $dst" %}
3521   ins_encode %{
3522     int vector_len = vector_length_encoding(this);
3523     __ vallones($dst$$XMMRegister, vector_len);
3524   %}
3525   ins_pipe( pipe_slow );
3526 %}
3527 
3528 // ====================ReplicateL=======================================
3529 
3530 #ifdef _LP64
3531 // Replicate long (8 byte) scalar to be vector
3532 instruct ReplL_reg(vec dst, rRegL src) %{
3533   match(Set dst (ReplicateL src));
3534   format %{ "replicateL $dst,$src" %}
3535   ins_encode %{
3536     uint vlen = vector_length(this);
3537     if (vlen == 2) {
3538       __ movdq($dst$$XMMRegister, $src$$Register);
3539       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3540     } else if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
3541       int vlen_enc = vector_length_encoding(this);
3542       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
3543     } else {
3544       assert(vlen == 4, "sanity");
3545       __ movdq($dst$$XMMRegister, $src$$Register);
3546       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3547       __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
3548     }
3549   %}
3550   ins_pipe( pipe_slow );
3551 %}
3552 #else // _LP64
3553 // Replicate long (8 byte) scalar to be vector
3554 instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{
3555   predicate(n->as_Vector()->length() <= 4);
3556   match(Set dst (ReplicateL src));
3557   effect(TEMP dst, USE src, TEMP tmp);
3558   format %{ "replicateL $dst,$src" %}
3559   ins_encode %{
3560     uint vlen = vector_length(this);
3561     if (vlen == 2) {
3562       __ movdl($dst$$XMMRegister, $src$$Register);
3563       __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3564       __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3565       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3566     } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
3567       int vector_len = Assembler::AVX_256bit;
3568       __ movdl($dst$$XMMRegister, $src$$Register);
3569       __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3570       __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3571       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3572     } else {
3573       __ movdl($dst$$XMMRegister, $src$$Register);
3574       __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3575       __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3576       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3577       __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
3578     }
3579   %}
3580   ins_pipe( pipe_slow );
3581 %}
3582 
3583 instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{
3584   predicate(n->as_Vector()->length() == 8);
3585   match(Set dst (ReplicateL src));
3586   effect(TEMP dst, USE src, TEMP tmp);
3587   format %{ "replicateL $dst,$src" %}
3588   ins_encode %{
3589     if (VM_Version::supports_avx512vl()) {
3590       __ movdl($dst$$XMMRegister, $src$$Register);
3591       __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3592       __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3593       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3594       __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
3595       __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
3596     } else {
3597       int vector_len = Assembler::AVX_512bit;
3598       __ movdl($dst$$XMMRegister, $src$$Register);
3599       __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3600       __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3601       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3602     }
3603   %}
3604   ins_pipe( pipe_slow );
3605 %}
3606 #endif // _LP64
3607 
3608 instruct ReplL_mem(vec dst, memory mem) %{
3609   match(Set dst (ReplicateL (LoadL mem)));
3610   format %{ "replicateL $dst,$mem" %}
3611   ins_encode %{
3612     uint vlen = vector_length(this);
3613     if (vlen == 2) {
3614       __ movq($dst$$XMMRegister, $mem$$Address);
3615       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3616     } else {
3617       assert(VM_Version::supports_avx2(), "sanity");
3618       int vlen_enc = vector_length_encoding(this);
3619       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
3620     }
3621   %}
3622   ins_pipe( pipe_slow );
3623 %}
3624 
3625 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
3626 instruct ReplL_imm(vec dst, immL con) %{
3627   match(Set dst (ReplicateL con));
3628   format %{ "replicateL $dst,$con" %}
3629   ins_encode %{
3630     uint vlen = vector_length(this);
3631     InternalAddress const_addr = $constantaddress($con);
3632     if (vlen == 2) {
3633       __ movq($dst$$XMMRegister, const_addr);
3634       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3635     } else {
3636       assert(VM_Version::supports_avx2(), "sanity");
3637       int vlen_enc = vector_length_encoding(this);
3638       __ movq($dst$$XMMRegister, const_addr);
3639       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
3640     }
3641   %}
3642   ins_pipe( pipe_slow );
3643 %}
3644 
3645 instruct ReplL_zero(vec dst, immL0 zero) %{
3646   match(Set dst (ReplicateL zero));
3647   format %{ "replicateL $dst,$zero" %}
3648   ins_encode %{
3649     int vlen = vector_length(this);
3650     if (vlen == 2) {
3651       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3652     } else {
3653       int vlen_enc = vector_length_encoding(this);
3654       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
3655     }
3656   %}
3657   ins_pipe( fpu_reg_reg );
3658 %}
3659 
3660 instruct ReplL_M1(vec dst, immL_M1 con) %{
3661   predicate(UseAVX > 0);
3662   match(Set dst (ReplicateL con));
3663   effect(TEMP dst);
3664   format %{ "vallones $dst" %}
3665   ins_encode %{
3666     int vector_len = vector_length_encoding(this);
3667     __ vallones($dst$$XMMRegister, vector_len);
3668   %}
3669   ins_pipe( pipe_slow );
3670 %}
3671 
3672 // ====================ReplicateF=======================================
3673 
3674 instruct ReplF_reg(vec dst, vlRegF src) %{
3675   match(Set dst (ReplicateF src));
3676   format %{ "replicateF $dst,$src" %}
3677   ins_encode %{
3678     uint vlen = vector_length(this);
3679     if (vlen <= 4) {
3680       __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3681    } else if (VM_Version::supports_avx2()) {
3682       int vector_len = vector_length_encoding(this);
3683       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); // reg-to-reg variant requires AVX2
3684     } else {
3685       assert(vlen == 8, "sanity");
3686       __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3687       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
3688     }
3689   %}
3690   ins_pipe( pipe_slow );
3691 %}
3692 
3693 instruct ReplF_mem(vec dst, memory mem) %{
3694   match(Set dst (ReplicateF (LoadF mem)));
3695   format %{ "replicateF $dst,$mem" %}
3696   ins_encode %{
3697     uint vlen = vector_length(this);
3698     if (vlen <= 4) {
3699       __ movdl($dst$$XMMRegister, $mem$$Address);
3700       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3701     } else {
3702       assert(VM_Version::supports_avx(), "sanity");
3703       int vector_len = vector_length_encoding(this);
3704       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
3705     }
3706   %}
3707   ins_pipe( pipe_slow );
3708 %}
3709 
3710 instruct ReplF_zero(vec dst, immF0 zero) %{
3711   match(Set dst (ReplicateF zero));
3712   format %{ "replicateF $dst,$zero" %}
3713   ins_encode %{
3714     uint vlen = vector_length(this);
3715     if (vlen <= 4) {
3716       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
3717     } else {
3718       int vlen_enc = vector_length_encoding(this);
3719       __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ
3720     }
3721   %}
3722   ins_pipe( fpu_reg_reg );
3723 %}
3724 
3725 // ====================ReplicateD=======================================
3726 
3727 // Replicate double (8 bytes) scalar to be vector
3728 instruct ReplD_reg(vec dst, vlRegD src) %{
3729   match(Set dst (ReplicateD src));
3730   format %{ "replicateD $dst,$src" %}
3731   ins_encode %{
3732     uint vlen = vector_length(this);
3733     if (vlen == 2) {
3734       __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
3735     } else if (VM_Version::supports_avx2()) {
3736       int vector_len = vector_length_encoding(this);
3737       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); // reg-to-reg variant requires AVX2
3738     } else {
3739       assert(vlen == 4, "sanity");
3740       __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
3741       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
3742     }
3743   %}
3744   ins_pipe( pipe_slow );
3745 %}
3746 
3747 instruct ReplD_mem(vec dst, memory mem) %{
3748   match(Set dst (ReplicateD (LoadD mem)));
3749   format %{ "replicateD $dst,$mem" %}
3750   ins_encode %{
3751     uint vlen = vector_length(this);
3752     if (vlen == 2) {
3753       __ movq($dst$$XMMRegister, $mem$$Address);
3754       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x44);
3755     } else {
3756       assert(VM_Version::supports_avx(), "sanity");
3757       int vector_len = vector_length_encoding(this);
3758       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
3759     }
3760   %}
3761   ins_pipe( pipe_slow );
3762 %}
3763 
3764 instruct ReplD_zero(vec dst, immD0 zero) %{
3765   match(Set dst (ReplicateD zero));
3766   format %{ "replicateD $dst,$zero" %}
3767   ins_encode %{
3768     uint vlen = vector_length(this);
3769     if (vlen == 2) {
3770       __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
3771     } else {
3772       int vlen_enc = vector_length_encoding(this);
3773       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ
3774     }
3775   %}
3776   ins_pipe( fpu_reg_reg );
3777 %}
3778 
3779 // ====================REDUCTION ARITHMETIC=======================================
3780 // =======================Int Reduction==========================================
3781 
3782 instruct reductionI(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
3783   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT &&
3784             n->in(2)->bottom_type()->is_vect()->length() < 16);
3785   match(Set dst (AddReductionVI src1 src2));
3786   match(Set dst (MulReductionVI src1 src2));
3787   match(Set dst (AndReductionV  src1 src2));
3788   match(Set dst ( OrReductionV  src1 src2));
3789   match(Set dst (XorReductionV  src1 src2));
3790   effect(TEMP vtmp1, TEMP vtmp2);
3791   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
3792   ins_encode %{
3793     int opcode = this->ideal_Opcode();
3794     int vlen = vector_length(this, $src2);
3795     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
3796   %}
3797   ins_pipe( pipe_slow );
3798 %}
3799 
3800 instruct reduction16I(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
3801   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT &&
3802             n->in(2)->bottom_type()->is_vect()->length() == 16);
3803   match(Set dst (AddReductionVI src1 src2));
3804   match(Set dst (MulReductionVI src1 src2));
3805   match(Set dst (AndReductionV  src1 src2));
3806   match(Set dst ( OrReductionV  src1 src2));
3807   match(Set dst (XorReductionV  src1 src2));
3808   effect(TEMP vtmp1, TEMP vtmp2);
3809   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
3810   ins_encode %{
3811     int opcode = this->ideal_Opcode();
3812     int vlen = vector_length(this, $src2);
3813     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
3814   %}
3815   ins_pipe( pipe_slow );
3816 %}
3817 
3818 // =======================Long Reduction==========================================
3819 
3820 #ifdef _LP64
3821 instruct reductionL(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
3822   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
3823             n->in(2)->bottom_type()->is_vect()->length() < 8);
3824   match(Set dst (AddReductionVL src1 src2));
3825   match(Set dst (MulReductionVL src1 src2));
3826   match(Set dst (AndReductionV  src1 src2));
3827   match(Set dst ( OrReductionV  src1 src2));
3828   match(Set dst (XorReductionV  src1 src2));
3829   effect(TEMP vtmp1, TEMP vtmp2);
3830   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
3831   ins_encode %{
3832     int opcode = this->ideal_Opcode();
3833     int vlen = vector_length(this, $src2);
3834     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
3835   %}
3836   ins_pipe( pipe_slow );
3837 %}
3838 
3839 instruct reduction8L(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
3840   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
3841             n->in(2)->bottom_type()->is_vect()->length() == 8);
3842   match(Set dst (AddReductionVL src1 src2));
3843   match(Set dst (MulReductionVL src1 src2));
3844   match(Set dst (AndReductionV  src1 src2));
3845   match(Set dst ( OrReductionV  src1 src2));
3846   match(Set dst (XorReductionV  src1 src2));
3847   effect(TEMP vtmp1, TEMP vtmp2);
3848   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
3849   ins_encode %{
3850     int opcode = this->ideal_Opcode();
3851     int vlen = vector_length(this, $src2);
3852     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
3853   %}
3854   ins_pipe( pipe_slow );
3855 %}
3856 #endif // _LP64
3857 
3858 // =======================Float Reduction==========================================
3859 
3860 instruct reductionF128(regF dst, vec src, vec vtmp) %{
3861   predicate(n->in(2)->bottom_type()->is_vect()->length() <= 4);
3862   match(Set dst (AddReductionVF dst src));
3863   match(Set dst (MulReductionVF dst src));
3864   effect(TEMP dst, TEMP vtmp);
3865   format %{ "vector_reduction_fp  $dst,$src ; using $vtmp as TEMP" %}
3866   ins_encode %{
3867     int opcode = this->ideal_Opcode();
3868     int vlen = vector_length(this, $src);
3869     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
3870   %}
3871   ins_pipe( pipe_slow );
3872 %}
3873 
3874 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
3875   predicate(n->in(2)->bottom_type()->is_vect()->length() == 8);
3876   match(Set dst (AddReductionVF dst src));
3877   match(Set dst (MulReductionVF dst src));
3878   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
3879   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
3880   ins_encode %{
3881     int opcode = this->ideal_Opcode();
3882     int vlen = vector_length(this, $src);
3883     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
3884   %}
3885   ins_pipe( pipe_slow );
3886 %}
3887 
3888 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
3889   predicate(n->in(2)->bottom_type()->is_vect()->length() == 16);
3890   match(Set dst (AddReductionVF dst src));
3891   match(Set dst (MulReductionVF dst src));
3892   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
3893   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
3894   ins_encode %{
3895     int opcode = this->ideal_Opcode();
3896     int vlen = vector_length(this, $src);
3897     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
3898   %}
3899   ins_pipe( pipe_slow );
3900 %}
3901 
3902 // =======================Double Reduction==========================================
3903 
3904 instruct reduction2D(regD dst, vec src, vec vtmp) %{
3905   predicate(n->in(2)->bottom_type()->is_vect()->length() == 2);
3906   match(Set dst (AddReductionVD dst src));
3907   match(Set dst (MulReductionVD dst src));
3908   effect(TEMP dst, TEMP vtmp);
3909   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
3910   ins_encode %{
3911     int opcode = this->ideal_Opcode();
3912     int vlen = vector_length(this, $src);
3913     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
3914   %}
3915   ins_pipe( pipe_slow );
3916 %}
3917 
3918 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
3919   predicate(n->in(2)->bottom_type()->is_vect()->length() == 4);
3920   match(Set dst (AddReductionVD dst src));
3921   match(Set dst (MulReductionVD dst src));
3922   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
3923   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
3924   ins_encode %{
3925     int opcode = this->ideal_Opcode();
3926     int vlen = vector_length(this, $src);
3927     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
3928   %}
3929   ins_pipe( pipe_slow );
3930 %}
3931 
3932 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
3933   predicate(n->in(2)->bottom_type()->is_vect()->length() == 8);
3934   match(Set dst (AddReductionVD dst src));
3935   match(Set dst (MulReductionVD dst src));
3936   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
3937   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
3938   ins_encode %{
3939     int opcode = this->ideal_Opcode();
3940     int vlen = vector_length(this, $src);
3941     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
3942   %}
3943   ins_pipe( pipe_slow );
3944 %}
3945 
3946 // ====================VECTOR ARITHMETIC=======================================
3947 
3948 // --------------------------------- ADD --------------------------------------
3949 
3950 // Bytes vector add
3951 instruct vaddB(vec dst, vec src) %{
3952   predicate(UseAVX == 0);
3953   match(Set dst (AddVB dst src));
3954   format %{ "paddb   $dst,$src\t! add packedB" %}
3955   ins_encode %{
3956     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
3957   %}
3958   ins_pipe( pipe_slow );
3959 %}
3960 
3961 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
3962   predicate(UseAVX > 0);
3963   match(Set dst (AddVB src1 src2));
3964   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
3965   ins_encode %{
3966     int vector_len = vector_length_encoding(this);
3967     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
3968   %}
3969   ins_pipe( pipe_slow );
3970 %}
3971 
3972 instruct vaddB_mem(vec dst, vec src, memory mem) %{
3973   predicate(UseAVX > 0);
3974   match(Set dst (AddVB src (LoadVector mem)));
3975   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
3976   ins_encode %{
3977     int vector_len = vector_length_encoding(this);
3978     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
3979   %}
3980   ins_pipe( pipe_slow );
3981 %}
3982 
3983 // Shorts/Chars vector add
3984 instruct vaddS(vec dst, vec src) %{
3985   predicate(UseAVX == 0);
3986   match(Set dst (AddVS dst src));
3987   format %{ "paddw   $dst,$src\t! add packedS" %}
3988   ins_encode %{
3989     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
3990   %}
3991   ins_pipe( pipe_slow );
3992 %}
3993 
3994 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
3995   predicate(UseAVX > 0);
3996   match(Set dst (AddVS src1 src2));
3997   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
3998   ins_encode %{
3999     int vector_len = vector_length_encoding(this);
4000     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4001   %}
4002   ins_pipe( pipe_slow );
4003 %}
4004 
4005 instruct vaddS_mem(vec dst, vec src, memory mem) %{
4006   predicate(UseAVX > 0);
4007   match(Set dst (AddVS src (LoadVector mem)));
4008   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
4009   ins_encode %{
4010     int vector_len = vector_length_encoding(this);
4011     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4012   %}
4013   ins_pipe( pipe_slow );
4014 %}
4015 
4016 // Integers vector add
4017 instruct vaddI(vec dst, vec src) %{
4018   predicate(UseAVX == 0);
4019   match(Set dst (AddVI dst src));
4020   format %{ "paddd   $dst,$src\t! add packedI" %}
4021   ins_encode %{
4022     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
4023   %}
4024   ins_pipe( pipe_slow );
4025 %}
4026 
4027 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
4028   predicate(UseAVX > 0);
4029   match(Set dst (AddVI src1 src2));
4030   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
4031   ins_encode %{
4032     int vector_len = vector_length_encoding(this);
4033     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4034   %}
4035   ins_pipe( pipe_slow );
4036 %}
4037 
4038 
4039 instruct vaddI_mem(vec dst, vec src, memory mem) %{
4040   predicate(UseAVX > 0);
4041   match(Set dst (AddVI src (LoadVector mem)));
4042   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
4043   ins_encode %{
4044     int vector_len = vector_length_encoding(this);
4045     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4046   %}
4047   ins_pipe( pipe_slow );
4048 %}
4049 
4050 // Longs vector add
4051 instruct vaddL(vec dst, vec src) %{
4052   predicate(UseAVX == 0);
4053   match(Set dst (AddVL dst src));
4054   format %{ "paddq   $dst,$src\t! add packedL" %}
4055   ins_encode %{
4056     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
4057   %}
4058   ins_pipe( pipe_slow );
4059 %}
4060 
4061 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
4062   predicate(UseAVX > 0);
4063   match(Set dst (AddVL src1 src2));
4064   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
4065   ins_encode %{
4066     int vector_len = vector_length_encoding(this);
4067     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4068   %}
4069   ins_pipe( pipe_slow );
4070 %}
4071 
4072 instruct vaddL_mem(vec dst, vec src, memory mem) %{
4073   predicate(UseAVX > 0);
4074   match(Set dst (AddVL src (LoadVector mem)));
4075   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
4076   ins_encode %{
4077     int vector_len = vector_length_encoding(this);
4078     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4079   %}
4080   ins_pipe( pipe_slow );
4081 %}
4082 
4083 // Floats vector add
4084 instruct vaddF(vec dst, vec src) %{
4085   predicate(UseAVX == 0);
4086   match(Set dst (AddVF dst src));
4087   format %{ "addps   $dst,$src\t! add packedF" %}
4088   ins_encode %{
4089     __ addps($dst$$XMMRegister, $src$$XMMRegister);
4090   %}
4091   ins_pipe( pipe_slow );
4092 %}
4093 
4094 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
4095   predicate(UseAVX > 0);
4096   match(Set dst (AddVF src1 src2));
4097   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
4098   ins_encode %{
4099     int vector_len = vector_length_encoding(this);
4100     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4101   %}
4102   ins_pipe( pipe_slow );
4103 %}
4104 
4105 instruct vaddF_mem(vec dst, vec src, memory mem) %{
4106   predicate(UseAVX > 0);
4107   match(Set dst (AddVF src (LoadVector mem)));
4108   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
4109   ins_encode %{
4110     int vector_len = vector_length_encoding(this);
4111     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4112   %}
4113   ins_pipe( pipe_slow );
4114 %}
4115 
4116 // Doubles vector add
4117 instruct vaddD(vec dst, vec src) %{
4118   predicate(UseAVX == 0);
4119   match(Set dst (AddVD dst src));
4120   format %{ "addpd   $dst,$src\t! add packedD" %}
4121   ins_encode %{
4122     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
4123   %}
4124   ins_pipe( pipe_slow );
4125 %}
4126 
4127 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
4128   predicate(UseAVX > 0);
4129   match(Set dst (AddVD src1 src2));
4130   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
4131   ins_encode %{
4132     int vector_len = vector_length_encoding(this);
4133     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4134   %}
4135   ins_pipe( pipe_slow );
4136 %}
4137 
4138 instruct vaddD_mem(vec dst, vec src, memory mem) %{
4139   predicate(UseAVX > 0);
4140   match(Set dst (AddVD src (LoadVector mem)));
4141   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
4142   ins_encode %{
4143     int vector_len = vector_length_encoding(this);
4144     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4145   %}
4146   ins_pipe( pipe_slow );
4147 %}
4148 
4149 // --------------------------------- SUB --------------------------------------
4150 
4151 // Bytes vector sub
4152 instruct vsubB(vec dst, vec src) %{
4153   predicate(UseAVX == 0);
4154   match(Set dst (SubVB dst src));
4155   format %{ "psubb   $dst,$src\t! sub packedB" %}
4156   ins_encode %{
4157     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
4158   %}
4159   ins_pipe( pipe_slow );
4160 %}
4161 
4162 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
4163   predicate(UseAVX > 0);
4164   match(Set dst (SubVB src1 src2));
4165   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
4166   ins_encode %{
4167     int vector_len = vector_length_encoding(this);
4168     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4169   %}
4170   ins_pipe( pipe_slow );
4171 %}
4172 
4173 instruct vsubB_mem(vec dst, vec src, memory mem) %{
4174   predicate(UseAVX > 0);
4175   match(Set dst (SubVB src (LoadVector mem)));
4176   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
4177   ins_encode %{
4178     int vector_len = vector_length_encoding(this);
4179     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4180   %}
4181   ins_pipe( pipe_slow );
4182 %}
4183 
4184 // Shorts/Chars vector sub
4185 instruct vsubS(vec dst, vec src) %{
4186   predicate(UseAVX == 0);
4187   match(Set dst (SubVS dst src));
4188   format %{ "psubw   $dst,$src\t! sub packedS" %}
4189   ins_encode %{
4190     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
4191   %}
4192   ins_pipe( pipe_slow );
4193 %}
4194 
4195 
4196 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
4197   predicate(UseAVX > 0);
4198   match(Set dst (SubVS src1 src2));
4199   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
4200   ins_encode %{
4201     int vector_len = vector_length_encoding(this);
4202     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4203   %}
4204   ins_pipe( pipe_slow );
4205 %}
4206 
4207 instruct vsubS_mem(vec dst, vec src, memory mem) %{
4208   predicate(UseAVX > 0);
4209   match(Set dst (SubVS src (LoadVector mem)));
4210   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
4211   ins_encode %{
4212     int vector_len = vector_length_encoding(this);
4213     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4214   %}
4215   ins_pipe( pipe_slow );
4216 %}
4217 
4218 // Integers vector sub
4219 instruct vsubI(vec dst, vec src) %{
4220   predicate(UseAVX == 0);
4221   match(Set dst (SubVI dst src));
4222   format %{ "psubd   $dst,$src\t! sub packedI" %}
4223   ins_encode %{
4224     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
4225   %}
4226   ins_pipe( pipe_slow );
4227 %}
4228 
4229 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
4230   predicate(UseAVX > 0);
4231   match(Set dst (SubVI src1 src2));
4232   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
4233   ins_encode %{
4234     int vector_len = vector_length_encoding(this);
4235     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4236   %}
4237   ins_pipe( pipe_slow );
4238 %}
4239 
4240 instruct vsubI_mem(vec dst, vec src, memory mem) %{
4241   predicate(UseAVX > 0);
4242   match(Set dst (SubVI src (LoadVector mem)));
4243   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
4244   ins_encode %{
4245     int vector_len = vector_length_encoding(this);
4246     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4247   %}
4248   ins_pipe( pipe_slow );
4249 %}
4250 
4251 // Longs vector sub
4252 instruct vsubL(vec dst, vec src) %{
4253   predicate(UseAVX == 0);
4254   match(Set dst (SubVL dst src));
4255   format %{ "psubq   $dst,$src\t! sub packedL" %}
4256   ins_encode %{
4257     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
4258   %}
4259   ins_pipe( pipe_slow );
4260 %}
4261 
4262 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
4263   predicate(UseAVX > 0);
4264   match(Set dst (SubVL src1 src2));
4265   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
4266   ins_encode %{
4267     int vector_len = vector_length_encoding(this);
4268     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4269   %}
4270   ins_pipe( pipe_slow );
4271 %}
4272 
4273 
4274 instruct vsubL_mem(vec dst, vec src, memory mem) %{
4275   predicate(UseAVX > 0);
4276   match(Set dst (SubVL src (LoadVector mem)));
4277   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
4278   ins_encode %{
4279     int vector_len = vector_length_encoding(this);
4280     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4281   %}
4282   ins_pipe( pipe_slow );
4283 %}
4284 
4285 // Floats vector sub
4286 instruct vsubF(vec dst, vec src) %{
4287   predicate(UseAVX == 0);
4288   match(Set dst (SubVF dst src));
4289   format %{ "subps   $dst,$src\t! sub packedF" %}
4290   ins_encode %{
4291     __ subps($dst$$XMMRegister, $src$$XMMRegister);
4292   %}
4293   ins_pipe( pipe_slow );
4294 %}
4295 
4296 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
4297   predicate(UseAVX > 0);
4298   match(Set dst (SubVF src1 src2));
4299   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
4300   ins_encode %{
4301     int vector_len = vector_length_encoding(this);
4302     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4303   %}
4304   ins_pipe( pipe_slow );
4305 %}
4306 
4307 instruct vsubF_mem(vec dst, vec src, memory mem) %{
4308   predicate(UseAVX > 0);
4309   match(Set dst (SubVF src (LoadVector mem)));
4310   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
4311   ins_encode %{
4312     int vector_len = vector_length_encoding(this);
4313     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4314   %}
4315   ins_pipe( pipe_slow );
4316 %}
4317 
4318 // Doubles vector sub
4319 instruct vsubD(vec dst, vec src) %{
4320   predicate(UseAVX == 0);
4321   match(Set dst (SubVD dst src));
4322   format %{ "subpd   $dst,$src\t! sub packedD" %}
4323   ins_encode %{
4324     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
4325   %}
4326   ins_pipe( pipe_slow );
4327 %}
4328 
4329 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
4330   predicate(UseAVX > 0);
4331   match(Set dst (SubVD src1 src2));
4332   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
4333   ins_encode %{
4334     int vector_len = vector_length_encoding(this);
4335     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4336   %}
4337   ins_pipe( pipe_slow );
4338 %}
4339 
4340 instruct vsubD_mem(vec dst, vec src, memory mem) %{
4341   predicate(UseAVX > 0);
4342   match(Set dst (SubVD src (LoadVector mem)));
4343   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
4344   ins_encode %{
4345     int vector_len = vector_length_encoding(this);
4346     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4347   %}
4348   ins_pipe( pipe_slow );
4349 %}
4350 
4351 // --------------------------------- MUL --------------------------------------
4352 
4353 // Byte vector mul
4354 instruct mulB_reg(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{
4355   predicate(n->as_Vector()->length() == 4 ||
4356             n->as_Vector()->length() == 8);
4357   match(Set dst (MulVB src1 src2));
4358   effect(TEMP dst, TEMP tmp, TEMP scratch);
4359   format %{"vector_mulB $dst,$src1,$src2" %}
4360   ins_encode %{
4361     assert(UseSSE > 3, "required");
4362     __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister);
4363     __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister);
4364     __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister);
4365     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
4366     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
4367     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
4368   %}
4369   ins_pipe( pipe_slow );
4370 %}
4371 
4372 instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{
4373   predicate(n->as_Vector()->length() == 16 && UseAVX <= 1);
4374   match(Set dst (MulVB src1 src2));
4375   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
4376   format %{"vector_mulB $dst,$src1,$src2" %}
4377   ins_encode %{
4378     assert(UseSSE > 3, "required");
4379     __ pmovsxbw($tmp1$$XMMRegister, $src1$$XMMRegister);
4380     __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister);
4381     __ pmullw($tmp1$$XMMRegister, $tmp2$$XMMRegister);
4382     __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 0xEE);
4383     __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xEE);
4384     __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister);
4385     __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister);
4386     __ pmullw($tmp2$$XMMRegister, $dst$$XMMRegister);
4387     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
4388     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
4389     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
4390     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
4391   %}
4392   ins_pipe( pipe_slow );
4393 %}
4394 
4395 instruct vmul16B_reg_avx(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{
4396   predicate(n->as_Vector()->length() == 16 && UseAVX > 1);
4397   match(Set dst (MulVB src1 src2));
4398   effect(TEMP dst, TEMP tmp, TEMP scratch);
4399   format %{"vector_mulB $dst,$src1,$src2" %}
4400   ins_encode %{
4401   int vector_len = Assembler::AVX_256bit;
4402     __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len);
4403     __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len);
4404     __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vector_len);
4405     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
4406     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
4407     __ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister);
4408     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0);
4409   %}
4410   ins_pipe( pipe_slow );
4411 %}
4412 
4413 instruct vmul32B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{
4414   predicate(n->as_Vector()->length() == 32);
4415   match(Set dst (MulVB src1 src2));
4416   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
4417   format %{"vector_mulB $dst,$src1,$src2" %}
4418   ins_encode %{
4419     assert(UseAVX > 1, "required");
4420     int vector_len = Assembler::AVX_256bit;
4421     __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister);
4422     __ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister);
4423     __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len);
4424     __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4425     __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
4426     __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len);
4427     __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len);
4428     __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
4429     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
4430     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4431     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
4432     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4433     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vector_len);
4434     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len);
4435   %}
4436   ins_pipe( pipe_slow );
4437 %}
4438 
4439 instruct vmul64B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{
4440   predicate(n->as_Vector()->length() == 64);
4441   match(Set dst (MulVB src1 src2));
4442   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
4443   format %{"vector_mulB $dst,$src1,$src2\n\t" %}
4444   ins_encode %{
4445     assert(UseAVX > 2, "required");
4446     int vector_len = Assembler::AVX_512bit;
4447     __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister);
4448     __ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister);
4449     __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len);
4450     __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4451     __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
4452     __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len);
4453     __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len);
4454     __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
4455     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
4456     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4457     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
4458     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
4459     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4460     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register);
4461     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
4462   %}
4463   ins_pipe( pipe_slow );
4464 %}
4465 
4466 // Shorts/Chars vector mul
4467 instruct vmulS(vec dst, vec src) %{
4468   predicate(UseAVX == 0);
4469   match(Set dst (MulVS dst src));
4470   format %{ "pmullw $dst,$src\t! mul packedS" %}
4471   ins_encode %{
4472     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
4473   %}
4474   ins_pipe( pipe_slow );
4475 %}
4476 
4477 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
4478   predicate(UseAVX > 0);
4479   match(Set dst (MulVS src1 src2));
4480   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
4481   ins_encode %{
4482     int vector_len = vector_length_encoding(this);
4483     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4484   %}
4485   ins_pipe( pipe_slow );
4486 %}
4487 
4488 instruct vmulS_mem(vec dst, vec src, memory mem) %{
4489   predicate(UseAVX > 0);
4490   match(Set dst (MulVS src (LoadVector mem)));
4491   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
4492   ins_encode %{
4493     int vector_len = vector_length_encoding(this);
4494     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4495   %}
4496   ins_pipe( pipe_slow );
4497 %}
4498 
4499 // Integers vector mul
4500 instruct vmulI(vec dst, vec src) %{
4501   predicate(UseAVX == 0);
4502   match(Set dst (MulVI dst src));
4503   format %{ "pmulld  $dst,$src\t! mul packedI" %}
4504   ins_encode %{
4505     assert(UseSSE > 3, "required");
4506     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
4507   %}
4508   ins_pipe( pipe_slow );
4509 %}
4510 
4511 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
4512   predicate(UseAVX > 0);
4513   match(Set dst (MulVI src1 src2));
4514   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
4515   ins_encode %{
4516     int vector_len = vector_length_encoding(this);
4517     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4518   %}
4519   ins_pipe( pipe_slow );
4520 %}
4521 
4522 instruct vmulI_mem(vec dst, vec src, memory mem) %{
4523   predicate(UseAVX > 0);
4524   match(Set dst (MulVI src (LoadVector mem)));
4525   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
4526   ins_encode %{
4527     int vector_len = vector_length_encoding(this);
4528     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4529   %}
4530   ins_pipe( pipe_slow );
4531 %}
4532 
4533 // Longs vector mul
4534 instruct vmulL_reg(vec dst, vec src1, vec src2) %{
4535   match(Set dst (MulVL src1 src2));
4536   format %{ "vpmullq $dst,$src1,$src2\t! mul packedL" %}
4537   ins_encode %{
4538     assert(UseAVX > 2, "required");
4539     int vector_len = vector_length_encoding(this);
4540     __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4541   %}
4542   ins_pipe( pipe_slow );
4543 %}
4544 
4545 instruct vmulL_mem(vec dst, vec src, memory mem) %{
4546   match(Set dst (MulVL src (LoadVector mem)));
4547   format %{ "vpmullq $dst,$src,$mem\t! mul packedL" %}
4548   ins_encode %{
4549     assert(UseAVX > 2, "required");
4550     int vector_len = vector_length_encoding(this);
4551     __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4552   %}
4553   ins_pipe( pipe_slow );
4554 %}
4555 
4556 // Floats vector mul
4557 instruct vmulF(vec dst, vec src) %{
4558   predicate(UseAVX == 0);
4559   match(Set dst (MulVF dst src));
4560   format %{ "mulps   $dst,$src\t! mul packedF" %}
4561   ins_encode %{
4562     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
4563   %}
4564   ins_pipe( pipe_slow );
4565 %}
4566 
4567 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
4568   predicate(UseAVX > 0);
4569   match(Set dst (MulVF src1 src2));
4570   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
4571   ins_encode %{
4572     int vector_len = vector_length_encoding(this);
4573     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4574   %}
4575   ins_pipe( pipe_slow );
4576 %}
4577 
4578 instruct vmulF_mem(vec dst, vec src, memory mem) %{
4579   predicate(UseAVX > 0);
4580   match(Set dst (MulVF src (LoadVector mem)));
4581   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
4582   ins_encode %{
4583     int vector_len = vector_length_encoding(this);
4584     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4585   %}
4586   ins_pipe( pipe_slow );
4587 %}
4588 
4589 // Doubles vector mul
4590 instruct vmulD(vec dst, vec src) %{
4591   predicate(UseAVX == 0);
4592   match(Set dst (MulVD dst src));
4593   format %{ "mulpd   $dst,$src\t! mul packedD" %}
4594   ins_encode %{
4595     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
4596   %}
4597   ins_pipe( pipe_slow );
4598 %}
4599 
4600 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
4601   predicate(UseAVX > 0);
4602   match(Set dst (MulVD src1 src2));
4603   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
4604   ins_encode %{
4605     int vector_len = vector_length_encoding(this);
4606     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4607   %}
4608   ins_pipe( pipe_slow );
4609 %}
4610 
4611 instruct vmulD_mem(vec dst, vec src, memory mem) %{
4612   predicate(UseAVX > 0);
4613   match(Set dst (MulVD src (LoadVector mem)));
4614   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
4615   ins_encode %{
4616     int vector_len = vector_length_encoding(this);
4617     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4618   %}
4619   ins_pipe( pipe_slow );
4620 %}
4621 
4622 instruct vcmov8F_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{
4623   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4624   match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2)));
4625   effect(TEMP dst, USE src1, USE src2);
4626   format %{ "cmpps.$copnd  $dst, $src1, $src2  ! vcmovevf, cond=$cop\n\t"
4627             "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t"
4628          %}
4629   ins_encode %{
4630     int vector_len = 1;
4631     int cond = (Assembler::Condition)($copnd$$cmpcode);
4632     __ cmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len);
4633     __ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len);
4634   %}
4635   ins_pipe( pipe_slow );
4636 %}
4637 
4638 instruct vcmov4D_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{
4639   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4640   match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2)));
4641   effect(TEMP dst, USE src1, USE src2);
4642   format %{ "cmppd.$copnd  $dst, $src1, $src2  ! vcmovevd, cond=$cop\n\t"
4643             "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t"
4644          %}
4645   ins_encode %{
4646     int vector_len = 1;
4647     int cond = (Assembler::Condition)($copnd$$cmpcode);
4648     __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len);
4649     __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len);
4650   %}
4651   ins_pipe( pipe_slow );
4652 %}
4653 
4654 // --------------------------------- DIV --------------------------------------
4655 
4656 // Floats vector div
4657 instruct vdivF(vec dst, vec src) %{
4658   predicate(UseAVX == 0);
4659   match(Set dst (DivVF dst src));
4660   format %{ "divps   $dst,$src\t! div packedF" %}
4661   ins_encode %{
4662     __ divps($dst$$XMMRegister, $src$$XMMRegister);
4663   %}
4664   ins_pipe( pipe_slow );
4665 %}
4666 
4667 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
4668   predicate(UseAVX > 0);
4669   match(Set dst (DivVF src1 src2));
4670   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
4671   ins_encode %{
4672     int vector_len = vector_length_encoding(this);
4673     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4674   %}
4675   ins_pipe( pipe_slow );
4676 %}
4677 
4678 instruct vdivF_mem(vec dst, vec src, memory mem) %{
4679   predicate(UseAVX > 0);
4680   match(Set dst (DivVF src (LoadVector mem)));
4681   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
4682   ins_encode %{
4683     int vector_len = vector_length_encoding(this);
4684     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4685   %}
4686   ins_pipe( pipe_slow );
4687 %}
4688 
4689 // Doubles vector div
4690 instruct vdivD(vec dst, vec src) %{
4691   predicate(UseAVX == 0);
4692   match(Set dst (DivVD dst src));
4693   format %{ "divpd   $dst,$src\t! div packedD" %}
4694   ins_encode %{
4695     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
4696   %}
4697   ins_pipe( pipe_slow );
4698 %}
4699 
4700 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
4701   predicate(UseAVX > 0);
4702   match(Set dst (DivVD src1 src2));
4703   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
4704   ins_encode %{
4705     int vector_len = vector_length_encoding(this);
4706     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4707   %}
4708   ins_pipe( pipe_slow );
4709 %}
4710 
4711 instruct vdivD_mem(vec dst, vec src, memory mem) %{
4712   predicate(UseAVX > 0);
4713   match(Set dst (DivVD src (LoadVector mem)));
4714   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
4715   ins_encode %{
4716     int vector_len = vector_length_encoding(this);
4717     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4718   %}
4719   ins_pipe( pipe_slow );
4720 %}
4721 
4722 // --------------------------------- Sqrt --------------------------------------
4723 
4724 instruct vsqrtF_reg(vec dst, vec src) %{
4725   match(Set dst (SqrtVF src));
4726   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
4727   ins_encode %{
4728     assert(UseAVX > 0, "required");
4729     int vector_len = vector_length_encoding(this);
4730     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len);
4731   %}
4732   ins_pipe( pipe_slow );
4733 %}
4734 
4735 instruct vsqrtF_mem(vec dst, memory mem) %{
4736   match(Set dst (SqrtVF (LoadVector mem)));
4737   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
4738   ins_encode %{
4739     assert(UseAVX > 0, "required");
4740     int vector_len = vector_length_encoding(this);
4741     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len);
4742   %}
4743   ins_pipe( pipe_slow );
4744 %}
4745 
4746 // Floating point vector sqrt
4747 instruct vsqrtD_reg(vec dst, vec src) %{
4748   match(Set dst (SqrtVD src));
4749   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
4750   ins_encode %{
4751     assert(UseAVX > 0, "required");
4752     int vector_len = vector_length_encoding(this);
4753     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
4754   %}
4755   ins_pipe( pipe_slow );
4756 %}
4757 
4758 instruct vsqrtD_mem(vec dst, memory mem) %{
4759   match(Set dst (SqrtVD (LoadVector mem)));
4760   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
4761   ins_encode %{
4762     assert(UseAVX > 0, "required");
4763     int vector_len = vector_length_encoding(this);
4764     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len);
4765   %}
4766   ins_pipe( pipe_slow );
4767 %}
4768 
4769 // ------------------------------ Shift ---------------------------------------
4770 
4771 // Left and right shift count vectors are the same on x86
4772 // (only lowest bits of xmm reg are used for count).
4773 instruct vshiftcnt(vec dst, rRegI cnt) %{
4774   match(Set dst (LShiftCntV cnt));
4775   match(Set dst (RShiftCntV cnt));
4776   format %{ "movdl    $dst,$cnt\t! load shift count" %}
4777   ins_encode %{
4778     __ movdl($dst$$XMMRegister, $cnt$$Register);
4779   %}
4780   ins_pipe( pipe_slow );
4781 %}
4782 
4783 // Byte vector shift
4784 instruct vshiftB(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
4785   predicate(n->as_Vector()->length() <= 8);
4786   match(Set dst (LShiftVB src shift));
4787   match(Set dst (RShiftVB src shift));
4788   match(Set dst (URShiftVB src shift));
4789   effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch);
4790   format %{"vector_byte_shift $dst,$src,$shift" %}
4791   ins_encode %{
4792     assert(UseSSE > 3, "required");
4793     int opcode = this->ideal_Opcode();
4794     __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister);
4795     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
4796     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
4797     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
4798     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
4799   %}
4800   ins_pipe( pipe_slow );
4801 %}
4802 
4803 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{
4804   predicate(n->as_Vector()->length() == 16 && UseAVX <= 1);
4805   match(Set dst (LShiftVB src shift));
4806   match(Set dst (RShiftVB src shift));
4807   match(Set dst (URShiftVB src shift));
4808   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch);
4809   format %{"vector_byte_shift $dst,$src,$shift" %}
4810   ins_encode %{
4811     assert(UseSSE > 3, "required");
4812     int opcode = this->ideal_Opcode();
4813 
4814     __ vextendbw(opcode, $tmp1$$XMMRegister, $src$$XMMRegister);
4815     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
4816     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
4817     __ vextendbw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
4818     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
4819     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
4820     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
4821     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
4822     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
4823   %}
4824   ins_pipe( pipe_slow );
4825 %}
4826 
4827 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
4828   predicate(n->as_Vector()->length() == 16 && UseAVX > 1);
4829   match(Set dst (LShiftVB src shift));
4830   match(Set dst (RShiftVB src shift));
4831   match(Set dst (URShiftVB src shift));
4832   effect(TEMP dst, TEMP tmp, TEMP scratch);
4833   format %{"vector_byte_shift $dst,$src,$shift" %}
4834   ins_encode %{
4835     int opcode = this->ideal_Opcode();
4836     int vector_len = Assembler::AVX_256bit;
4837     __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister, vector_len);
4838     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
4839     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
4840     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
4841     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
4842   %}
4843   ins_pipe( pipe_slow );
4844 %}
4845 
4846 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
4847   predicate(n->as_Vector()->length() == 32);
4848   match(Set dst (LShiftVB src shift));
4849   match(Set dst (RShiftVB src shift));
4850   match(Set dst (URShiftVB src shift));
4851   effect(TEMP dst, TEMP tmp, TEMP scratch);
4852   format %{"vector_byte_shift $dst,$src,$shift" %}
4853   ins_encode %{
4854     assert(UseAVX > 1, "required");
4855     int opcode = this->ideal_Opcode();
4856     int vector_len = Assembler::AVX_256bit;
4857     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
4858     __ vextendbw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len);
4859     __ vextendbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len);
4860     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
4861     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vector_len);
4862     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
4863     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
4864     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
4865     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len);
4866   %}
4867   ins_pipe( pipe_slow );
4868 %}
4869 
4870 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{
4871   predicate(n->as_Vector()->length() == 64);
4872   match(Set dst (LShiftVB src shift));
4873   match(Set dst (RShiftVB src shift));
4874   match(Set dst (URShiftVB src shift));
4875   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
4876   format %{"vector_byte_shift $dst,$src,$shift" %}
4877   ins_encode %{
4878     assert(UseAVX > 2, "required");
4879     int opcode = this->ideal_Opcode();
4880     int vector_len = Assembler::AVX_512bit;
4881     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
4882     __ vextendbw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len);
4883     __ vextendbw(opcode, $tmp2$$XMMRegister, $src$$XMMRegister, vector_len);
4884     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vector_len);
4885     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vector_len);
4886     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
4887     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4888     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
4889     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
4890     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4891     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register);
4892     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
4893   %}
4894   ins_pipe( pipe_slow );
4895 %}
4896 
4897 // Shorts vector logical right shift produces incorrect Java result
4898 // for negative data because java code convert short value into int with
4899 // sign extension before a shift. But char vectors are fine since chars are
4900 // unsigned values.
4901 // Shorts/Chars vector left shift
4902 instruct vshiftS(vec dst, vec src, vec shift) %{
4903   match(Set dst (LShiftVS src shift));
4904   match(Set dst (RShiftVS src shift));
4905   match(Set dst (URShiftVS src shift));
4906   effect(TEMP dst, USE src, USE shift);
4907   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
4908   ins_encode %{
4909     int opcode = this->ideal_Opcode();
4910     if (UseAVX > 0) {
4911       int vlen_enc = vector_length_encoding(this);
4912       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
4913     } else {
4914       int vlen = vector_length(this);
4915       if (vlen == 2) {
4916         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
4917         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
4918       } else if (vlen == 4) {
4919         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
4920         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
4921       } else {
4922         assert (vlen == 8, "sanity");
4923         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
4924         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
4925       }
4926     }
4927   %}
4928   ins_pipe( pipe_slow );
4929 %}
4930 
4931 // Integers vector left shift
4932 instruct vshiftI(vec dst, vec src, vec shift) %{
4933   match(Set dst (LShiftVI src shift));
4934   match(Set dst (RShiftVI src shift));
4935   match(Set dst (URShiftVI src shift));
4936   effect(TEMP dst, USE src, USE shift);
4937   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
4938   ins_encode %{
4939     int opcode = this->ideal_Opcode();
4940     if (UseAVX > 0) {
4941       int vector_len = vector_length_encoding(this);
4942       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
4943     } else {
4944       int vlen = vector_length(this);
4945       if (vlen == 2) {
4946         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
4947         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
4948       } else {
4949         assert(vlen == 4, "sanity");
4950         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
4951         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
4952       }
4953     }
4954   %}
4955   ins_pipe( pipe_slow );
4956 %}
4957 
4958 // Longs vector shift
4959 instruct vshiftL(vec dst, vec src, vec shift) %{
4960   match(Set dst (LShiftVL src shift));
4961   match(Set dst (URShiftVL src shift));
4962   effect(TEMP dst, USE src, USE shift);
4963   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
4964   ins_encode %{
4965     int opcode = this->ideal_Opcode();
4966     if (UseAVX > 0) {
4967       int vector_len = vector_length_encoding(this);
4968       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
4969     } else {
4970       assert(vector_length(this) == 2, "");
4971       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
4972       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
4973     }
4974   %}
4975   ins_pipe( pipe_slow );
4976 %}
4977 
4978 // -------------------ArithmeticRightShift -----------------------------------
4979 // Long vector arithmetic right shift
4980 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
4981   predicate(UseAVX <= 2);
4982   match(Set dst (RShiftVL src shift));
4983   effect(TEMP dst, TEMP tmp, TEMP scratch);
4984   format %{ "vshiftq $dst,$src,$shift" %}
4985   ins_encode %{
4986     uint vlen = vector_length(this);
4987     if (vlen == 2) {
4988       assert(UseSSE >= 2, "required");
4989       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
4990       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
4991       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register);
4992       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
4993       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
4994       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
4995     } else {
4996       assert(vlen == 4, "sanity");
4997       assert(UseAVX > 1, "required");
4998       int vector_len = Assembler::AVX_256bit;
4999       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
5000       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register);
5001       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
5002       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
5003       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
5004     }
5005   %}
5006   ins_pipe( pipe_slow );
5007 %}
5008 
5009 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
5010   predicate(UseAVX > 2);
5011   match(Set dst (RShiftVL src shift));
5012   format %{ "vshiftq $dst,$src,$shift" %}
5013   ins_encode %{
5014     int vector_len = vector_length_encoding(this);
5015     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
5016   %}
5017   ins_pipe( pipe_slow );
5018 %}
5019 
5020 // --------------------------------- AND --------------------------------------
5021 
5022 instruct vand(vec dst, vec src) %{
5023   predicate(UseAVX == 0);
5024   match(Set dst (AndV dst src));
5025   format %{ "pand    $dst,$src\t! and vectors" %}
5026   ins_encode %{
5027     __ pand($dst$$XMMRegister, $src$$XMMRegister);
5028   %}
5029   ins_pipe( pipe_slow );
5030 %}
5031 
5032 instruct vand_reg(vec dst, vec src1, vec src2) %{
5033   predicate(UseAVX > 0);
5034   match(Set dst (AndV src1 src2));
5035   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
5036   ins_encode %{
5037     int vector_len = vector_length_encoding(this);
5038     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5039   %}
5040   ins_pipe( pipe_slow );
5041 %}
5042 
5043 instruct vand_mem(vec dst, vec src, memory mem) %{
5044   predicate(UseAVX > 0);
5045   match(Set dst (AndV src (LoadVector mem)));
5046   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
5047   ins_encode %{
5048     int vector_len = vector_length_encoding(this);
5049     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5050   %}
5051   ins_pipe( pipe_slow );
5052 %}
5053 
5054 // --------------------------------- OR ---------------------------------------
5055 
5056 instruct vor(vec dst, vec src) %{
5057   predicate(UseAVX == 0);
5058   match(Set dst (OrV dst src));
5059   format %{ "por     $dst,$src\t! or vectors" %}
5060   ins_encode %{
5061     __ por($dst$$XMMRegister, $src$$XMMRegister);
5062   %}
5063   ins_pipe( pipe_slow );
5064 %}
5065 
5066 instruct vor_reg(vec dst, vec src1, vec src2) %{
5067   predicate(UseAVX > 0);
5068   match(Set dst (OrV src1 src2));
5069   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
5070   ins_encode %{
5071     int vector_len = vector_length_encoding(this);
5072     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5073   %}
5074   ins_pipe( pipe_slow );
5075 %}
5076 
5077 instruct vor_mem(vec dst, vec src, memory mem) %{
5078   predicate(UseAVX > 0);
5079   match(Set dst (OrV src (LoadVector mem)));
5080   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
5081   ins_encode %{
5082     int vector_len = vector_length_encoding(this);
5083     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5084   %}
5085   ins_pipe( pipe_slow );
5086 %}
5087 
5088 // --------------------------------- XOR --------------------------------------
5089 
5090 instruct vxor(vec dst, vec src) %{
5091   predicate(UseAVX == 0);
5092   match(Set dst (XorV dst src));
5093   format %{ "pxor    $dst,$src\t! xor vectors" %}
5094   ins_encode %{
5095     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
5096   %}
5097   ins_pipe( pipe_slow );
5098 %}
5099 
5100 instruct vxor_reg(vec dst, vec src1, vec src2) %{
5101   predicate(UseAVX > 0);
5102   match(Set dst (XorV src1 src2));
5103   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
5104   ins_encode %{
5105     int vector_len = vector_length_encoding(this);
5106     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5107   %}
5108   ins_pipe( pipe_slow );
5109 %}
5110 
5111 instruct vxor_mem(vec dst, vec src, memory mem) %{
5112   predicate(UseAVX > 0);
5113   match(Set dst (XorV src (LoadVector mem)));
5114   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
5115   ins_encode %{
5116     int vector_len = vector_length_encoding(this);
5117     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5118   %}
5119   ins_pipe( pipe_slow );
5120 %}
5121 
5122 // --------------------------------- ABS --------------------------------------
5123 // a = |a|
5124 instruct vabsB_reg(vec dst, vec src) %{
5125   match(Set dst (AbsVB  src));
5126   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
5127   ins_encode %{
5128     uint vlen = vector_length(this);
5129     if (vlen <= 16) {
5130       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
5131     } else {
5132       int vlen_enc = vector_length_encoding(this);
5133       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
5134     }
5135   %}
5136   ins_pipe( pipe_slow );
5137 %}
5138 
5139 instruct vabsS_reg(vec dst, vec src) %{
5140   match(Set dst (AbsVS  src));
5141   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
5142   ins_encode %{
5143     uint vlen = vector_length(this);
5144     if (vlen <= 8) {
5145       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
5146     } else {
5147       int vlen_enc = vector_length_encoding(this);
5148       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
5149     }
5150   %}
5151   ins_pipe( pipe_slow );
5152 %}
5153 
5154 instruct vabsI_reg(vec dst, vec src) %{
5155   match(Set dst (AbsVI  src));
5156   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
5157   ins_encode %{
5158     uint vlen = vector_length(this);
5159     if (vlen <= 4) {
5160       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
5161     } else {
5162       int vlen_enc = vector_length_encoding(this);
5163       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
5164     }
5165   %}
5166   ins_pipe( pipe_slow );
5167 %}
5168 
5169 instruct vabsL_reg(vec dst, vec src) %{
5170   match(Set dst (AbsVL  src));
5171   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
5172   ins_encode %{
5173     assert(UseAVX > 2, "required");
5174     int vector_len = vector_length_encoding(this);
5175     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
5176   %}
5177   ins_pipe( pipe_slow );
5178 %}
5179 
5180 // --------------------------------- ABSNEG --------------------------------------
5181 
5182 instruct vabsnegF(vec dst, vec src, rRegI scratch) %{
5183   predicate(n->as_Vector()->length() != 4); // handled by 1-operand instruction vabsneg4F
5184   match(Set dst (AbsVF src));
5185   match(Set dst (NegVF src));
5186   effect(TEMP scratch);
5187   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
5188   ins_cost(150);
5189   ins_encode %{
5190     int opcode = this->ideal_Opcode();
5191     int vlen = vector_length(this);
5192     if (vlen == 2) {
5193       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register);
5194     } else {
5195       assert(vlen == 8 || vlen == 16, "required");
5196       int vlen_enc = vector_length_encoding(this);
5197       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register);
5198     }
5199   %}
5200   ins_pipe( pipe_slow );
5201 %}
5202 
5203 instruct vabsneg4F(vec dst, rRegI scratch) %{
5204   predicate(n->as_Vector()->length() == 4);
5205   match(Set dst (AbsVF dst));
5206   match(Set dst (NegVF dst));
5207   effect(TEMP scratch);
5208   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
5209   ins_cost(150);
5210   ins_encode %{
5211     int opcode = this->ideal_Opcode();
5212     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $scratch$$Register);
5213   %}
5214   ins_pipe( pipe_slow );
5215 %}
5216 
5217 instruct vabsnegD(vec dst, vec src, rRegI scratch) %{
5218   match(Set dst (AbsVD  src));
5219   match(Set dst (NegVD  src));
5220   effect(TEMP scratch);
5221   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
5222   ins_encode %{
5223     int opcode = this->ideal_Opcode();
5224     uint vlen = vector_length(this);
5225     if (vlen == 2) {
5226       assert(UseSSE >= 2, "required");
5227       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register);
5228     } else {
5229       int vlen_enc = vector_length_encoding(this);
5230       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register);
5231     }
5232   %}
5233   ins_pipe( pipe_slow );
5234 %}
5235 
5236 // --------------------------------- FMA --------------------------------------
5237 // a * b + c
5238 
5239 instruct vfmaF_reg(vec a, vec b, vec c) %{
5240   match(Set c (FmaVF  c (Binary a b)));
5241   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
5242   ins_cost(150);
5243   ins_encode %{
5244     assert(UseFMA, "not enabled");
5245     int vector_len = vector_length_encoding(this);
5246     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len);
5247   %}
5248   ins_pipe( pipe_slow );
5249 %}
5250 
5251 instruct vfmaF_mem(vec a, memory b, vec c) %{
5252   match(Set c (FmaVF  c (Binary a (LoadVector b))));
5253   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
5254   ins_cost(150);
5255   ins_encode %{
5256     assert(UseFMA, "not enabled");
5257     int vector_len = vector_length_encoding(this);
5258     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len);
5259   %}
5260   ins_pipe( pipe_slow );
5261 %}
5262 
5263 instruct vfmaD_reg(vec a, vec b, vec c) %{
5264   match(Set c (FmaVD  c (Binary a b)));
5265   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
5266   ins_cost(150);
5267   ins_encode %{
5268     assert(UseFMA, "not enabled");
5269     int vector_len = vector_length_encoding(this);
5270     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len);
5271   %}
5272   ins_pipe( pipe_slow );
5273 %}
5274 
5275 instruct vfmaD_mem(vec a, memory b, vec c) %{
5276   match(Set c (FmaVD  c (Binary a (LoadVector b))));
5277   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
5278   ins_cost(150);
5279   ins_encode %{
5280     assert(UseFMA, "not enabled");
5281     int vector_len = vector_length_encoding(this);
5282     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len);
5283   %}
5284   ins_pipe( pipe_slow );
5285 %}
5286 
5287 // --------------------------------- Vector Multiply Add --------------------------------------
5288 
5289 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
5290   predicate(UseAVX == 0);
5291   match(Set dst (MulAddVS2VI dst src1));
5292   format %{ "pmaddwd $dst,$dst,$src1\t! muladd packedStoI" %}
5293   ins_encode %{
5294     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
5295   %}
5296   ins_pipe( pipe_slow );
5297 %}
5298 
5299 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
5300   predicate(UseAVX > 0);
5301   match(Set dst (MulAddVS2VI src1 src2));
5302   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
5303   ins_encode %{
5304     int vector_len = vector_length_encoding(this);
5305     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5306   %}
5307   ins_pipe( pipe_slow );
5308 %}
5309 
5310 // --------------------------------- Vector Multiply Add Add ----------------------------------
5311 
5312 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
5313   predicate(VM_Version::supports_avx512_vnni());
5314   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
5315   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
5316   ins_encode %{
5317     assert(UseAVX > 2, "required");
5318     int vector_len = vector_length_encoding(this);
5319     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5320   %}
5321   ins_pipe( pipe_slow );
5322   ins_cost(10);
5323 %}
5324 
5325 // --------------------------------- PopCount --------------------------------------
5326 
5327 instruct vpopcountI(vec dst, vec src) %{
5328   match(Set dst (PopCountVI src));
5329   format %{ "vpopcntd  $dst,$src\t! vector popcount packedI" %}
5330   ins_encode %{
5331     assert(UsePopCountInstruction, "not enabled");
5332 
5333     int vector_len = vector_length_encoding(this);
5334     __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
5335   %}
5336   ins_pipe( pipe_slow );
5337 %}
5338 
5339 // --------------------------------- Bitwise Ternary Logic ----------------------------------
5340 
5341 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
5342   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
5343   effect(TEMP dst);
5344   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
5345   ins_encode %{
5346     int vector_len = vector_length_encoding(this);
5347     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
5348   %}
5349   ins_pipe( pipe_slow );
5350 %}
5351 
5352 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
5353   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
5354   effect(TEMP dst);
5355   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
5356   ins_encode %{
5357     int vector_len = vector_length_encoding(this);
5358     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
5359   %}
5360   ins_pipe( pipe_slow );
5361 %}