1 //
   2 // Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Common Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  63 // Word a in each register holds a Float, words ab hold a Double.
  64 // The whole registers are used in SSE4.2 version intrinsics,
  65 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  66 // UseXMMForArrayCopy and UseSuperword flags).
  67 // For pre EVEX enabled architectures:
  68 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  69 // For EVEX enabled architectures:
  70 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  71 //
  72 // Linux ABI:   No register preserved across function calls
  73 //              XMM0-XMM7 might hold parameters
  74 // Windows ABI: XMM6-XMM31 preserved across function calls
  75 //              XMM0-XMM3 might hold parameters
  76 
  77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  93 
  94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
 110 
 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
 127 
 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
 144 
 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
 161 
 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
 178 
 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
 195 
 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
 212 
 213 #ifdef _LP64
 214 
 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
 231 
 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
 248 
 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
 265 
 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
 282 
 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
 299 
 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
 316 
 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
 333 
 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
 350 
 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
 367 
 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
 384 
 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
 401 
 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
 418 
 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
 435 
 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
 452 
 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
 469 
 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
 486 
 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
 503 
 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
 520 
 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
 537 
 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
 554 
 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
 571 
 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
 588 
 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
 605 
 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
 622 
 623 #endif // _LP64
 624 
 625 #ifdef _LP64
 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
 627 #else
 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
 629 #endif // _LP64
 630 
 631 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 632                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 633                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 634                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 635                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 636                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 637                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 638                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p
 639 #ifdef _LP64
 640                   ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 641                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 642                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 643                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 644                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 645                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 646                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 647                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
 648                   ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 649                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 650                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 651                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 652                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 653                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 654                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 655                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 656                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 657                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 658                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 659                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 660                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 661                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 662                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 663                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p
 664 #endif
 665                       );
 666 
 667 // flags allocation class should be last.
 668 alloc_class chunk2(RFLAGS);
 669 
 670 // Singleton class for condition codes
 671 reg_class int_flags(RFLAGS);
 672 
 673 // Class for pre evex float registers
 674 reg_class float_reg_legacy(XMM0,
 675                     XMM1,
 676                     XMM2,
 677                     XMM3,
 678                     XMM4,
 679                     XMM5,
 680                     XMM6,
 681                     XMM7
 682 #ifdef _LP64
 683                    ,XMM8,
 684                     XMM9,
 685                     XMM10,
 686                     XMM11,
 687                     XMM12,
 688                     XMM13,
 689                     XMM14,
 690                     XMM15
 691 #endif
 692                     );
 693 
 694 // Class for evex float registers
 695 reg_class float_reg_evex(XMM0,
 696                     XMM1,
 697                     XMM2,
 698                     XMM3,
 699                     XMM4,
 700                     XMM5,
 701                     XMM6,
 702                     XMM7
 703 #ifdef _LP64
 704                    ,XMM8,
 705                     XMM9,
 706                     XMM10,
 707                     XMM11,
 708                     XMM12,
 709                     XMM13,
 710                     XMM14,
 711                     XMM15,
 712                     XMM16,
 713                     XMM17,
 714                     XMM18,
 715                     XMM19,
 716                     XMM20,
 717                     XMM21,
 718                     XMM22,
 719                     XMM23,
 720                     XMM24,
 721                     XMM25,
 722                     XMM26,
 723                     XMM27,
 724                     XMM28,
 725                     XMM29,
 726                     XMM30,
 727                     XMM31
 728 #endif
 729                     );
 730 
 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 732 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 733 
 734 // Class for pre evex double registers
 735 reg_class double_reg_legacy(XMM0,  XMM0b,
 736                      XMM1,  XMM1b,
 737                      XMM2,  XMM2b,
 738                      XMM3,  XMM3b,
 739                      XMM4,  XMM4b,
 740                      XMM5,  XMM5b,
 741                      XMM6,  XMM6b,
 742                      XMM7,  XMM7b
 743 #ifdef _LP64
 744                     ,XMM8,  XMM8b,
 745                      XMM9,  XMM9b,
 746                      XMM10, XMM10b,
 747                      XMM11, XMM11b,
 748                      XMM12, XMM12b,
 749                      XMM13, XMM13b,
 750                      XMM14, XMM14b,
 751                      XMM15, XMM15b
 752 #endif
 753                      );
 754 
 755 // Class for evex double registers
 756 reg_class double_reg_evex(XMM0,  XMM0b,
 757                      XMM1,  XMM1b,
 758                      XMM2,  XMM2b,
 759                      XMM3,  XMM3b,
 760                      XMM4,  XMM4b,
 761                      XMM5,  XMM5b,
 762                      XMM6,  XMM6b,
 763                      XMM7,  XMM7b
 764 #ifdef _LP64
 765                     ,XMM8,  XMM8b,
 766                      XMM9,  XMM9b,
 767                      XMM10, XMM10b,
 768                      XMM11, XMM11b,
 769                      XMM12, XMM12b,
 770                      XMM13, XMM13b,
 771                      XMM14, XMM14b,
 772                      XMM15, XMM15b,
 773                      XMM16, XMM16b,
 774                      XMM17, XMM17b,
 775                      XMM18, XMM18b,
 776                      XMM19, XMM19b,
 777                      XMM20, XMM20b,
 778                      XMM21, XMM21b,
 779                      XMM22, XMM22b,
 780                      XMM23, XMM23b,
 781                      XMM24, XMM24b,
 782                      XMM25, XMM25b,
 783                      XMM26, XMM26b,
 784                      XMM27, XMM27b,
 785                      XMM28, XMM28b,
 786                      XMM29, XMM29b,
 787                      XMM30, XMM30b,
 788                      XMM31, XMM31b
 789 #endif
 790                      );
 791 
 792 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 793 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 794 
 795 // Class for pre evex 32bit vector registers
 796 reg_class vectors_reg_legacy(XMM0,
 797                       XMM1,
 798                       XMM2,
 799                       XMM3,
 800                       XMM4,
 801                       XMM5,
 802                       XMM6,
 803                       XMM7
 804 #ifdef _LP64
 805                      ,XMM8,
 806                       XMM9,
 807                       XMM10,
 808                       XMM11,
 809                       XMM12,
 810                       XMM13,
 811                       XMM14,
 812                       XMM15
 813 #endif
 814                       );
 815 
 816 // Class for evex 32bit vector registers
 817 reg_class vectors_reg_evex(XMM0,
 818                       XMM1,
 819                       XMM2,
 820                       XMM3,
 821                       XMM4,
 822                       XMM5,
 823                       XMM6,
 824                       XMM7
 825 #ifdef _LP64
 826                      ,XMM8,
 827                       XMM9,
 828                       XMM10,
 829                       XMM11,
 830                       XMM12,
 831                       XMM13,
 832                       XMM14,
 833                       XMM15,
 834                       XMM16,
 835                       XMM17,
 836                       XMM18,
 837                       XMM19,
 838                       XMM20,
 839                       XMM21,
 840                       XMM22,
 841                       XMM23,
 842                       XMM24,
 843                       XMM25,
 844                       XMM26,
 845                       XMM27,
 846                       XMM28,
 847                       XMM29,
 848                       XMM30,
 849                       XMM31
 850 #endif
 851                       );
 852 
 853 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 854 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 855 
 856 // Class for all 64bit vector registers
 857 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 858                       XMM1,  XMM1b,
 859                       XMM2,  XMM2b,
 860                       XMM3,  XMM3b,
 861                       XMM4,  XMM4b,
 862                       XMM5,  XMM5b,
 863                       XMM6,  XMM6b,
 864                       XMM7,  XMM7b
 865 #ifdef _LP64
 866                      ,XMM8,  XMM8b,
 867                       XMM9,  XMM9b,
 868                       XMM10, XMM10b,
 869                       XMM11, XMM11b,
 870                       XMM12, XMM12b,
 871                       XMM13, XMM13b,
 872                       XMM14, XMM14b,
 873                       XMM15, XMM15b
 874 #endif
 875                       );
 876 
 877 // Class for all 64bit vector registers
 878 reg_class vectord_reg_evex(XMM0,  XMM0b,
 879                       XMM1,  XMM1b,
 880                       XMM2,  XMM2b,
 881                       XMM3,  XMM3b,
 882                       XMM4,  XMM4b,
 883                       XMM5,  XMM5b,
 884                       XMM6,  XMM6b,
 885                       XMM7,  XMM7b
 886 #ifdef _LP64
 887                      ,XMM8,  XMM8b,
 888                       XMM9,  XMM9b,
 889                       XMM10, XMM10b,
 890                       XMM11, XMM11b,
 891                       XMM12, XMM12b,
 892                       XMM13, XMM13b,
 893                       XMM14, XMM14b,
 894                       XMM15, XMM15b,
 895                       XMM16, XMM16b,
 896                       XMM17, XMM17b,
 897                       XMM18, XMM18b,
 898                       XMM19, XMM19b,
 899                       XMM20, XMM20b,
 900                       XMM21, XMM21b,
 901                       XMM22, XMM22b,
 902                       XMM23, XMM23b,
 903                       XMM24, XMM24b,
 904                       XMM25, XMM25b,
 905                       XMM26, XMM26b,
 906                       XMM27, XMM27b,
 907                       XMM28, XMM28b,
 908                       XMM29, XMM29b,
 909                       XMM30, XMM30b,
 910                       XMM31, XMM31b
 911 #endif
 912                       );
 913 
 914 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 915 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 916 
 917 // Class for all 128bit vector registers
 918 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 919                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 920                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 921                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 922                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 923                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 924                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 925                       XMM7,  XMM7b,  XMM7c,  XMM7d
 926 #ifdef _LP64
 927                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,
 928                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 929                       XMM10, XMM10b, XMM10c, XMM10d,
 930                       XMM11, XMM11b, XMM11c, XMM11d,
 931                       XMM12, XMM12b, XMM12c, XMM12d,
 932                       XMM13, XMM13b, XMM13c, XMM13d,
 933                       XMM14, XMM14b, XMM14c, XMM14d,
 934                       XMM15, XMM15b, XMM15c, XMM15d
 935 #endif
 936                       );
 937 
 938 // Class for all 128bit vector registers
 939 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 940                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 941                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 942                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 943                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 944                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 945                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 946                       XMM7,  XMM7b,  XMM7c,  XMM7d
 947 #ifdef _LP64
 948                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,
 949                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 950                       XMM10, XMM10b, XMM10c, XMM10d,
 951                       XMM11, XMM11b, XMM11c, XMM11d,
 952                       XMM12, XMM12b, XMM12c, XMM12d,
 953                       XMM13, XMM13b, XMM13c, XMM13d,
 954                       XMM14, XMM14b, XMM14c, XMM14d,
 955                       XMM15, XMM15b, XMM15c, XMM15d,
 956                       XMM16, XMM16b, XMM16c, XMM16d,
 957                       XMM17, XMM17b, XMM17c, XMM17d,
 958                       XMM18, XMM18b, XMM18c, XMM18d,
 959                       XMM19, XMM19b, XMM19c, XMM19d,
 960                       XMM20, XMM20b, XMM20c, XMM20d,
 961                       XMM21, XMM21b, XMM21c, XMM21d,
 962                       XMM22, XMM22b, XMM22c, XMM22d,
 963                       XMM23, XMM23b, XMM23c, XMM23d,
 964                       XMM24, XMM24b, XMM24c, XMM24d,
 965                       XMM25, XMM25b, XMM25c, XMM25d,
 966                       XMM26, XMM26b, XMM26c, XMM26d,
 967                       XMM27, XMM27b, XMM27c, XMM27d,
 968                       XMM28, XMM28b, XMM28c, XMM28d,
 969                       XMM29, XMM29b, XMM29c, XMM29d,
 970                       XMM30, XMM30b, XMM30c, XMM30d,
 971                       XMM31, XMM31b, XMM31c, XMM31d
 972 #endif
 973                       );
 974 
 975 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 976 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 977 
 978 // Class for all 256bit vector registers
 979 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 980                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 981                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 982                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 983                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 984                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 985                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 986                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
 987 #ifdef _LP64
 988                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 989                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 990                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 991                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 992                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 993                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 994                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 995                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
 996 #endif
 997                       );
 998 
 999 // Class for all 256bit vector registers
1000 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
1001                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
1002                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
1003                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
1004                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
1005                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
1006                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
1007                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
1008 #ifdef _LP64
1009                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
1010                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
1011                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1012                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1013                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1014                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1015                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1016                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1017                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1018                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1019                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1020                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1021                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1022                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1023                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1024                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1025                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1026                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1027                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1028                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1029                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1030                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1031                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1032                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h
1033 #endif
1034                       );
1035 
1036 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1037 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1038 
1039 // Class for all 512bit vector registers
1040 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
1041                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
1042                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
1043                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
1044                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
1045                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
1046                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
1047                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p
1048 #ifdef _LP64
1049                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
1050                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
1051                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1052                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1053                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1054                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1055                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1056                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
1057                      ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1058                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1059                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1060                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1061                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1062                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1063                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1064                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1065                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1066                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1067                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1068                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1069                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1070                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1071                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1072                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p
1073 #endif
1074                       );
1075 
1076 // Class for restricted 512bit vector registers
1077 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
1078                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
1079                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
1080                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
1081                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
1082                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
1083                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
1084                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p
1085 #ifdef _LP64
1086                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
1087                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
1088                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1089                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1090                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1091                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1092                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1093                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
1094 #endif
1095                       );
1096 
1097 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1098 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1099 
1100 %}
1101 
1102 
1103 //----------SOURCE BLOCK-------------------------------------------------------
1104 // This is a block of C++ code which provides values, functions, and
1105 // definitions necessary in the rest of the architecture description
1106 
1107 source_hpp %{
1108 // Header information of the source block.
1109 // Method declarations/definitions which are used outside
1110 // the ad-scope can conveniently be defined here.
1111 //
1112 // To keep related declarations/definitions/uses close together,
1113 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
1114 
1115 class NativeJump;
1116 
1117 class CallStubImpl {
1118 
1119   //--------------------------------------------------------------
1120   //---<  Used for optimization in Compile::shorten_branches  >---
1121   //--------------------------------------------------------------
1122 
1123  public:
1124   // Size of call trampoline stub.
1125   static uint size_call_trampoline() {
1126     return 0; // no call trampolines on this platform
1127   }
1128 
1129   // number of relocations needed by a call trampoline stub
1130   static uint reloc_call_trampoline() {
1131     return 0; // no call trampolines on this platform
1132   }
1133 };
1134 
1135 class HandlerImpl {
1136 
1137  public:
1138 
1139   static int emit_exception_handler(CodeBuffer &cbuf);
1140   static int emit_deopt_handler(CodeBuffer& cbuf);
1141 
1142   static uint size_exception_handler() {
1143     // NativeCall instruction size is the same as NativeJump.
1144     // exception handler starts out as jump and can be patched to
1145     // a call be deoptimization.  (4932387)
1146     // Note that this value is also credited (in output.cpp) to
1147     // the size of the code section.
1148     return NativeJump::instruction_size;
1149   }
1150 
1151 #ifdef _LP64
1152   static uint size_deopt_handler() {
1153     // three 5 byte instructions plus one move for unreachable address.
1154     return 15+3;
1155   }
1156 #else
1157   static uint size_deopt_handler() {
1158     // NativeCall instruction size is the same as NativeJump.
1159     // exception handler starts out as jump and can be patched to
1160     // a call be deoptimization.  (4932387)
1161     // Note that this value is also credited (in output.cpp) to
1162     // the size of the code section.
1163     return 5 + NativeJump::instruction_size; // pushl(); jmp;
1164   }
1165 #endif
1166 };
1167 
1168 class Node::PD {
1169 public:
1170   enum NodeFlags {
1171     Flag_intel_jcc_erratum = Node::_last_flag << 1,
1172     _last_flag             = Flag_intel_jcc_erratum
1173   };
1174 };
1175 
1176 %} // end source_hpp
1177 
1178 source %{
1179 
1180 #include "opto/addnode.hpp"
1181 #include "c2_intelJccErratum_x86.hpp"
1182 
1183 void PhaseOutput::pd_perform_mach_node_analysis() {
1184   if (VM_Version::has_intel_jcc_erratum()) {
1185     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
1186     _buf_sizes._code += extra_padding;
1187   }
1188 }
1189 
1190 int MachNode::pd_alignment_required() const {
1191   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
1192     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
1193     return IntelJccErratum::largest_jcc_size() + 1;
1194   } else {
1195     return 1;
1196   }
1197 }
1198 
1199 int MachNode::compute_padding(int current_offset) const {
1200   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
1201     Compile* C = Compile::current();
1202     PhaseOutput* output = C->output();
1203     Block* block = output->block();
1204     int index = output->index();
1205     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
1206   } else {
1207     return 0;
1208   }
1209 }
1210 
1211 // Emit exception handler code.
1212 // Stuff framesize into a register and call a VM stub routine.
1213 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
1214 
1215   // Note that the code buffer's insts_mark is always relative to insts.
1216   // That's why we must use the macroassembler to generate a handler.
1217   C2_MacroAssembler _masm(&cbuf);
1218   address base = __ start_a_stub(size_exception_handler());
1219   if (base == NULL) {
1220     ciEnv::current()->record_failure("CodeCache is full");
1221     return 0;  // CodeBuffer::expand failed
1222   }
1223   int offset = __ offset();
1224   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1225   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1226   __ end_a_stub();
1227   return offset;
1228 }
1229 
1230 // Emit deopt handler code.
1231 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
1232 
1233   // Note that the code buffer's insts_mark is always relative to insts.
1234   // That's why we must use the macroassembler to generate a handler.
1235   C2_MacroAssembler _masm(&cbuf);
1236   address base = __ start_a_stub(size_deopt_handler());
1237   if (base == NULL) {
1238     ciEnv::current()->record_failure("CodeCache is full");
1239     return 0;  // CodeBuffer::expand failed
1240   }
1241   int offset = __ offset();
1242 
1243 #ifdef _LP64
1244   address the_pc = (address) __ pc();
1245   Label next;
1246   // push a "the_pc" on the stack without destroying any registers
1247   // as they all may be live.
1248 
1249   // push address of "next"
1250   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1251   __ bind(next);
1252   // adjust it so it matches "the_pc"
1253   __ subptr(Address(rsp, 0), __ offset() - offset);
1254 #else
1255   InternalAddress here(__ pc());
1256   __ pushptr(here.addr());
1257 #endif
1258 
1259   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1260   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
1261   __ end_a_stub();
1262   return offset;
1263 }
1264 
1265 
1266 //=============================================================================
1267 
1268   // Float masks come from different places depending on platform.
1269 #ifdef _LP64
1270   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
1271   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
1272   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
1273   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
1274 #else
1275   static address float_signmask()  { return (address)float_signmask_pool; }
1276   static address float_signflip()  { return (address)float_signflip_pool; }
1277   static address double_signmask() { return (address)double_signmask_pool; }
1278   static address double_signflip() { return (address)double_signflip_pool; }
1279 #endif
1280   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
1281   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
1282   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
1283 
1284 //=============================================================================
1285 const bool Matcher::match_rule_supported(int opcode) {
1286   if (!has_match_rule(opcode)) {
1287     return false; // no match rule present
1288   }
1289   switch (opcode) {
1290     case Op_AbsVL:
1291       if (UseAVX < 3) {
1292         return false;
1293       }
1294       break;
1295     case Op_PopCountI:
1296     case Op_PopCountL:
1297       if (!UsePopCountInstruction) {
1298         return false;
1299       }
1300       break;
1301     case Op_PopCountVI:
1302       if (!UsePopCountInstruction || !VM_Version::supports_avx512_vpopcntdq()) {
1303         return false;
1304       }
1305       break;
1306     case Op_MulVI:
1307       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
1308         return false;
1309       }
1310       break;
1311     case Op_MulVL:
1312     case Op_MulReductionVL:
1313       if (VM_Version::supports_avx512dq() == false) {
1314         return false;
1315       }
1316       break;
1317     case Op_AbsVB:
1318     case Op_AbsVS:
1319     case Op_AbsVI:
1320     case Op_AddReductionVI:
1321     case Op_AndReductionV:
1322     case Op_OrReductionV:
1323     case Op_XorReductionV:
1324       if (UseSSE < 3) { // requires at least SSSE3
1325         return false;
1326       }
1327       break;
1328     case Op_MulReductionVI:
1329       if (UseSSE < 4) { // requires at least SSE4
1330         return false;
1331       }
1332       break;
1333     case Op_SqrtVD:
1334     case Op_SqrtVF:
1335       if (UseAVX < 1) { // enabled for AVX only
1336         return false;
1337       }
1338       break;
1339     case Op_CompareAndSwapL:
1340 #ifdef _LP64
1341     case Op_CompareAndSwapP:
1342 #endif
1343       if (!VM_Version::supports_cx8()) {
1344         return false;
1345       }
1346       break;
1347     case Op_CMoveVF:
1348     case Op_CMoveVD:
1349       if (UseAVX < 1 || UseAVX > 2) {
1350         return false;
1351       }
1352       break;
1353     case Op_StrIndexOf:
1354       if (!UseSSE42Intrinsics) {
1355         return false;
1356       }
1357       break;
1358     case Op_StrIndexOfChar:
1359       if (!UseSSE42Intrinsics) {
1360         return false;
1361       }
1362       break;
1363     case Op_OnSpinWait:
1364       if (VM_Version::supports_on_spin_wait() == false) {
1365         return false;
1366       }
1367       break;
1368     case Op_MulVB:
1369     case Op_LShiftVB:
1370     case Op_RShiftVB:
1371     case Op_URShiftVB:
1372       if (UseSSE < 4) {
1373         return false;
1374       }
1375       break;
1376 #ifdef _LP64
1377     case Op_MaxD:
1378     case Op_MaxF:
1379     case Op_MinD:
1380     case Op_MinF:
1381       if (UseAVX < 1) { // enabled for AVX only
1382         return false;
1383       }
1384       break;
1385 #endif
1386     case Op_CacheWB:
1387     case Op_CacheWBPreSync:
1388     case Op_CacheWBPostSync:
1389       if (!VM_Version::supports_data_cache_line_flush()) {
1390         return false;
1391       }
1392       break;
1393     case Op_RoundDoubleMode:
1394       if (UseSSE < 4) {
1395         return false;
1396       }
1397       break;
1398     case Op_RoundDoubleModeV:
1399       if (VM_Version::supports_avx() == false) {
1400         return false; // 128bit vroundpd is not available
1401       }
1402       break;
1403     case Op_MacroLogicV:
1404       if (UseAVX < 3 || !UseVectorMacroLogic) {
1405         return false;
1406       }
1407       break;
1408 #ifndef _LP64
1409     case Op_AddReductionVF:
1410     case Op_AddReductionVD:
1411     case Op_MulReductionVF:
1412     case Op_MulReductionVD:
1413       if (UseSSE < 1) { // requires at least SSE
1414         return false;
1415       }
1416       break;
1417     case Op_MulAddVS2VI:
1418     case Op_RShiftVL:
1419     case Op_AbsVD:
1420     case Op_NegVD:
1421       if (UseSSE < 2) {
1422         return false;
1423       }
1424       break;
1425 #endif // !LP64
1426   }
1427   return true;  // Match rules are supported by default.
1428 }
1429 
1430 //------------------------------------------------------------------------
1431 
1432 // Identify extra cases that we might want to provide match rules for vector nodes and
1433 // other intrinsics guarded with vector length (vlen) and element type (bt).
1434 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
1435   if (!match_rule_supported(opcode)) {
1436     return false;
1437   }
1438   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
1439   //   * SSE2 supports 128bit vectors for all types;
1440   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
1441   //   * AVX2 supports 256bit vectors for all types;
1442   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
1443   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
1444   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
1445   // And MaxVectorSize is taken into account as well.
1446   if (!vector_size_supported(bt, vlen)) {
1447     return false;
1448   }
1449   // Special cases which require vector length follow:
1450   //   * implementation limitations
1451   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
1452   //   * 128bit vroundpd instruction is present only in AVX1
1453   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
1454   switch (opcode) {
1455     case Op_AbsVF:
1456     case Op_NegVF:
1457       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
1458         return false; // 512bit vandps and vxorps are not available
1459       }
1460       break;
1461     case Op_AbsVD:
1462     case Op_NegVD:
1463       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
1464         return false; // 512bit vandpd and vxorpd are not available
1465       }
1466       break;
1467     case Op_CMoveVF:
1468       if (vlen != 8) {
1469         return false; // implementation limitation (only vcmov8F_reg is present)
1470       }
1471       break;
1472     case Op_RotateRightV:
1473     case Op_RotateLeftV:
1474     case Op_MacroLogicV:
1475       if (!VM_Version::supports_evex() ||
1476           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
1477         return false;
1478       }
1479       break;
1480     case Op_CMoveVD:
1481       if (vlen != 4) {
1482         return false; // implementation limitation (only vcmov4D_reg is present)
1483       }
1484       break;
1485   }
1486   return true;  // Per default match rules are supported.
1487 }
1488 
1489 // x86 supports generic vector operands: vec and legVec.
1490 const bool Matcher::supports_generic_vector_operands = true;
1491 
1492 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
1493   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
1494   bool legacy = (generic_opnd->opcode() == LEGVEC);
1495   if (!VM_Version::supports_avx512vlbwdq() && // KNL
1496       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
1497     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
1498     return new legVecZOper();
1499   }
1500   if (legacy) {
1501     switch (ideal_reg) {
1502       case Op_VecS: return new legVecSOper();
1503       case Op_VecD: return new legVecDOper();
1504       case Op_VecX: return new legVecXOper();
1505       case Op_VecY: return new legVecYOper();
1506       case Op_VecZ: return new legVecZOper();
1507     }
1508   } else {
1509     switch (ideal_reg) {
1510       case Op_VecS: return new vecSOper();
1511       case Op_VecD: return new vecDOper();
1512       case Op_VecX: return new vecXOper();
1513       case Op_VecY: return new vecYOper();
1514       case Op_VecZ: return new vecZOper();
1515     }
1516   }
1517   ShouldNotReachHere();
1518   return NULL;
1519 }
1520 
1521 bool Matcher::is_generic_reg2reg_move(MachNode* m) {
1522   switch (m->rule()) {
1523     case MoveVec2Leg_rule:
1524     case MoveLeg2Vec_rule:
1525       return true;
1526     default:
1527       return false;
1528   }
1529 }
1530 
1531 bool Matcher::is_generic_vector(MachOper* opnd) {
1532   switch (opnd->opcode()) {
1533     case VEC:
1534     case LEGVEC:
1535       return true;
1536     default:
1537       return false;
1538   }
1539 }
1540 
1541 //------------------------------------------------------------------------
1542 
1543 const bool Matcher::has_predicated_vectors(void) {
1544   bool ret_value = false;
1545   if (UseAVX > 2) {
1546     ret_value = VM_Version::supports_avx512vl();
1547   }
1548 
1549   return ret_value;
1550 }
1551 
1552 const int Matcher::float_pressure(int default_pressure_threshold) {
1553   int float_pressure_threshold = default_pressure_threshold;
1554 #ifdef _LP64
1555   if (UseAVX > 2) {
1556     // Increase pressure threshold on machines with AVX3 which have
1557     // 2x more XMM registers.
1558     float_pressure_threshold = default_pressure_threshold * 2;
1559   }
1560 #endif
1561   return float_pressure_threshold;
1562 }
1563 
1564 // Max vector size in bytes. 0 if not supported.
1565 const int Matcher::vector_width_in_bytes(BasicType bt) {
1566   assert(is_java_primitive(bt), "only primitive type vectors");
1567   if (UseSSE < 2) return 0;
1568   // SSE2 supports 128bit vectors for all types.
1569   // AVX2 supports 256bit vectors for all types.
1570   // AVX2/EVEX supports 512bit vectors for all types.
1571   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
1572   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
1573   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
1574     size = (UseAVX > 2) ? 64 : 32;
1575   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
1576     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
1577   // Use flag to limit vector size.
1578   size = MIN2(size,(int)MaxVectorSize);
1579   // Minimum 2 values in vector (or 4 for bytes).
1580   switch (bt) {
1581   case T_DOUBLE:
1582   case T_LONG:
1583     if (size < 16) return 0;
1584     break;
1585   case T_FLOAT:
1586   case T_INT:
1587     if (size < 8) return 0;
1588     break;
1589   case T_BOOLEAN:
1590     if (size < 4) return 0;
1591     break;
1592   case T_CHAR:
1593     if (size < 4) return 0;
1594     break;
1595   case T_BYTE:
1596     if (size < 4) return 0;
1597     break;
1598   case T_SHORT:
1599     if (size < 4) return 0;
1600     break;
1601   default:
1602     ShouldNotReachHere();
1603   }
1604   return size;
1605 }
1606 
1607 // Limits on vector size (number of elements) loaded into vector.
1608 const int Matcher::max_vector_size(const BasicType bt) {
1609   return vector_width_in_bytes(bt)/type2aelembytes(bt);
1610 }
1611 const int Matcher::min_vector_size(const BasicType bt) {
1612   int max_size = max_vector_size(bt);
1613   // Min size which can be loaded into vector is 4 bytes.
1614   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
1615   return MIN2(size,max_size);
1616 }
1617 
1618 // Vector ideal reg corresponding to specified size in bytes
1619 const uint Matcher::vector_ideal_reg(int size) {
1620   assert(MaxVectorSize >= size, "");
1621   switch(size) {
1622     case  4: return Op_VecS;
1623     case  8: return Op_VecD;
1624     case 16: return Op_VecX;
1625     case 32: return Op_VecY;
1626     case 64: return Op_VecZ;
1627   }
1628   ShouldNotReachHere();
1629   return 0;
1630 }
1631 
1632 // x86 supports misaligned vectors store/load.
1633 const bool Matcher::misaligned_vectors_ok() {
1634   return true;
1635 }
1636 
1637 // x86 AES instructions are compatible with SunJCE expanded
1638 // keys, hence we do not need to pass the original key to stubs
1639 const bool Matcher::pass_original_key_for_aes() {
1640   return false;
1641 }
1642 
1643 
1644 const bool Matcher::convi2l_type_required = true;
1645 
1646 // Check for shift by small constant as well
1647 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
1648   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
1649       shift->in(2)->get_int() <= 3 &&
1650       // Are there other uses besides address expressions?
1651       !matcher->is_visited(shift)) {
1652     address_visited.set(shift->_idx); // Flag as address_visited
1653     mstack.push(shift->in(2), Matcher::Visit);
1654     Node *conv = shift->in(1);
1655 #ifdef _LP64
1656     // Allow Matcher to match the rule which bypass
1657     // ConvI2L operation for an array index on LP64
1658     // if the index value is positive.
1659     if (conv->Opcode() == Op_ConvI2L &&
1660         conv->as_Type()->type()->is_long()->_lo >= 0 &&
1661         // Are there other uses besides address expressions?
1662         !matcher->is_visited(conv)) {
1663       address_visited.set(conv->_idx); // Flag as address_visited
1664       mstack.push(conv->in(1), Matcher::Pre_Visit);
1665     } else
1666 #endif
1667       mstack.push(conv, Matcher::Pre_Visit);
1668     return true;
1669   }
1670   return false;
1671 }
1672 
1673 // This function identifies sub-graphs in which a 'load' node is
1674 // input to two different nodes, and such that it can be matched
1675 // with BMI instructions like blsi, blsr, etc.
1676 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
1677 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
1678 // refers to the same node.
1679 //
1680 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
1681 // This is a temporary solution until we make DAGs expressible in ADL.
1682 template<typename ConType>
1683 class FusedPatternMatcher {
1684   Node* _op1_node;
1685   Node* _mop_node;
1686   int _con_op;
1687 
1688   static int match_next(Node* n, int next_op, int next_op_idx) {
1689     if (n->in(1) == NULL || n->in(2) == NULL) {
1690       return -1;
1691     }
1692 
1693     if (next_op_idx == -1) { // n is commutative, try rotations
1694       if (n->in(1)->Opcode() == next_op) {
1695         return 1;
1696       } else if (n->in(2)->Opcode() == next_op) {
1697         return 2;
1698       }
1699     } else {
1700       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
1701       if (n->in(next_op_idx)->Opcode() == next_op) {
1702         return next_op_idx;
1703       }
1704     }
1705     return -1;
1706   }
1707 
1708  public:
1709   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
1710     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
1711 
1712   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
1713              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
1714              typename ConType::NativeType con_value) {
1715     if (_op1_node->Opcode() != op1) {
1716       return false;
1717     }
1718     if (_mop_node->outcnt() > 2) {
1719       return false;
1720     }
1721     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
1722     if (op1_op2_idx == -1) {
1723       return false;
1724     }
1725     // Memory operation must be the other edge
1726     int op1_mop_idx = (op1_op2_idx & 1) + 1;
1727 
1728     // Check that the mop node is really what we want
1729     if (_op1_node->in(op1_mop_idx) == _mop_node) {
1730       Node* op2_node = _op1_node->in(op1_op2_idx);
1731       if (op2_node->outcnt() > 1) {
1732         return false;
1733       }
1734       assert(op2_node->Opcode() == op2, "Should be");
1735       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
1736       if (op2_con_idx == -1) {
1737         return false;
1738       }
1739       // Memory operation must be the other edge
1740       int op2_mop_idx = (op2_con_idx & 1) + 1;
1741       // Check that the memory operation is the same node
1742       if (op2_node->in(op2_mop_idx) == _mop_node) {
1743         // Now check the constant
1744         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
1745         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
1746           return true;
1747         }
1748       }
1749     }
1750     return false;
1751   }
1752 };
1753 
1754 static bool is_bmi_pattern(Node* n, Node* m) {
1755   assert(UseBMI1Instructions, "sanity");
1756   if (n != NULL && m != NULL) {
1757     if (m->Opcode() == Op_LoadI) {
1758       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
1759       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
1760              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
1761              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
1762     } else if (m->Opcode() == Op_LoadL) {
1763       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
1764       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
1765              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
1766              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
1767     }
1768   }
1769   return false;
1770 }
1771 
1772 // Should the matcher clone input 'm' of node 'n'?
1773 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
1774   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
1775   if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
1776     mstack.push(m, Visit);
1777     return true;
1778   }
1779   return false;
1780 }
1781 
1782 // Should the Matcher clone shifts on addressing modes, expecting them
1783 // to be subsumed into complex addressing expressions or compute them
1784 // into registers?
1785 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
1786   Node *off = m->in(AddPNode::Offset);
1787   if (off->is_Con()) {
1788     address_visited.test_set(m->_idx); // Flag as address_visited
1789     Node *adr = m->in(AddPNode::Address);
1790 
1791     // Intel can handle 2 adds in addressing mode
1792     // AtomicAdd is not an addressing expression.
1793     // Cheap to find it by looking for screwy base.
1794     if (adr->is_AddP() &&
1795         !adr->in(AddPNode::Base)->is_top() &&
1796         LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32
1797         // Are there other uses besides address expressions?
1798         !is_visited(adr)) {
1799       address_visited.set(adr->_idx); // Flag as address_visited
1800       Node *shift = adr->in(AddPNode::Offset);
1801       if (!clone_shift(shift, this, mstack, address_visited)) {
1802         mstack.push(shift, Pre_Visit);
1803       }
1804       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
1805       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
1806     } else {
1807       mstack.push(adr, Pre_Visit);
1808     }
1809 
1810     // Clone X+offset as it also folds into most addressing expressions
1811     mstack.push(off, Visit);
1812     mstack.push(m->in(AddPNode::Base), Pre_Visit);
1813     return true;
1814   } else if (clone_shift(off, this, mstack, address_visited)) {
1815     address_visited.test_set(m->_idx); // Flag as address_visited
1816     mstack.push(m->in(AddPNode::Address), Pre_Visit);
1817     mstack.push(m->in(AddPNode::Base), Pre_Visit);
1818     return true;
1819   }
1820   return false;
1821 }
1822 
1823 void Compile::reshape_address(AddPNode* addp) {
1824 }
1825 
1826 static inline uint vector_length(const MachNode* n) {
1827   const TypeVect* vt = n->bottom_type()->is_vect();
1828   return vt->length();
1829 }
1830 
1831 static inline uint vector_length(const MachNode* use, MachOper* opnd) {
1832   uint def_idx = use->operand_index(opnd);
1833   Node* def = use->in(def_idx);
1834   return def->bottom_type()->is_vect()->length();
1835 }
1836 
1837 static inline uint vector_length_in_bytes(const MachNode* n) {
1838   const TypeVect* vt = n->bottom_type()->is_vect();
1839   return vt->length_in_bytes();
1840 }
1841 
1842 static inline uint vector_length_in_bytes(const MachNode* use, MachOper* opnd) {
1843   uint def_idx = use->operand_index(opnd);
1844   Node* def = use->in(def_idx);
1845   return def->bottom_type()->is_vect()->length_in_bytes();
1846 }
1847 
1848 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* n) {
1849   switch(vector_length_in_bytes(n)) {
1850     case  4: // fall-through
1851     case  8: // fall-through
1852     case 16: return Assembler::AVX_128bit;
1853     case 32: return Assembler::AVX_256bit;
1854     case 64: return Assembler::AVX_512bit;
1855 
1856     default: {
1857       ShouldNotReachHere();
1858       return Assembler::AVX_NoVec;
1859     }
1860   }
1861 }
1862 
1863 // Helper methods for MachSpillCopyNode::implementation().
1864 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
1865                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
1866   // In 64-bit VM size calculation is very complex. Emitting instructions
1867   // into scratch buffer is used to get size in 64-bit VM.
1868   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
1869   assert(ireg == Op_VecS || // 32bit vector
1870          (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
1871          (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
1872          "no non-adjacent vector moves" );
1873   if (cbuf) {
1874     C2_MacroAssembler _masm(cbuf);
1875     int offset = __ offset();
1876     switch (ireg) {
1877     case Op_VecS: // copy whole register
1878     case Op_VecD:
1879     case Op_VecX:
1880 #ifndef _LP64
1881       __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1882 #else
1883       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
1884         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1885       } else {
1886         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
1887      }
1888 #endif
1889       break;
1890     case Op_VecY:
1891 #ifndef _LP64
1892       __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1893 #else
1894       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
1895         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1896       } else {
1897         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
1898      }
1899 #endif
1900       break;
1901     case Op_VecZ:
1902       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
1903       break;
1904     default:
1905       ShouldNotReachHere();
1906     }
1907     int size = __ offset() - offset;
1908 #ifdef ASSERT
1909     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
1910     assert(!do_size || size == 4, "incorrect size calculattion");
1911 #endif
1912     return size;
1913 #ifndef PRODUCT
1914   } else if (!do_size) {
1915     switch (ireg) {
1916     case Op_VecS:
1917     case Op_VecD:
1918     case Op_VecX:
1919       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
1920       break;
1921     case Op_VecY:
1922     case Op_VecZ:
1923       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
1924       break;
1925     default:
1926       ShouldNotReachHere();
1927     }
1928 #endif
1929   }
1930   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
1931   return (UseAVX > 2) ? 6 : 4;
1932 }
1933 
1934 int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
1935                      int stack_offset, int reg, uint ireg, outputStream* st) {
1936   // In 64-bit VM size calculation is very complex. Emitting instructions
1937   // into scratch buffer is used to get size in 64-bit VM.
1938   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
1939   if (cbuf) {
1940     C2_MacroAssembler _masm(cbuf);
1941     int offset = __ offset();
1942     if (is_load) {
1943       switch (ireg) {
1944       case Op_VecS:
1945         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1946         break;
1947       case Op_VecD:
1948         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1949         break;
1950       case Op_VecX:
1951 #ifndef _LP64
1952         __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1953 #else
1954         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
1955           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1956         } else {
1957           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
1958           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
1959         }
1960 #endif
1961         break;
1962       case Op_VecY:
1963 #ifndef _LP64
1964         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1965 #else
1966         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
1967           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1968         } else {
1969           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
1970           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
1971         }
1972 #endif
1973         break;
1974       case Op_VecZ:
1975         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
1976         break;
1977       default:
1978         ShouldNotReachHere();
1979       }
1980     } else { // store
1981       switch (ireg) {
1982       case Op_VecS:
1983         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1984         break;
1985       case Op_VecD:
1986         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1987         break;
1988       case Op_VecX:
1989 #ifndef _LP64
1990         __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1991 #else
1992         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
1993           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1994         }
1995         else {
1996           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
1997         }
1998 #endif
1999         break;
2000       case Op_VecY:
2001 #ifndef _LP64
2002         __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
2003 #else
2004         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
2005           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
2006         }
2007         else {
2008           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
2009         }
2010 #endif
2011         break;
2012       case Op_VecZ:
2013         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
2014         break;
2015       default:
2016         ShouldNotReachHere();
2017       }
2018     }
2019     int size = __ offset() - offset;
2020 #ifdef ASSERT
2021     int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4);
2022     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
2023     assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
2024 #endif
2025     return size;
2026 #ifndef PRODUCT
2027   } else if (!do_size) {
2028     if (is_load) {
2029       switch (ireg) {
2030       case Op_VecS:
2031         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
2032         break;
2033       case Op_VecD:
2034         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
2035         break;
2036        case Op_VecX:
2037         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
2038         break;
2039       case Op_VecY:
2040       case Op_VecZ:
2041         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
2042         break;
2043       default:
2044         ShouldNotReachHere();
2045       }
2046     } else { // store
2047       switch (ireg) {
2048       case Op_VecS:
2049         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
2050         break;
2051       case Op_VecD:
2052         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
2053         break;
2054        case Op_VecX:
2055         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
2056         break;
2057       case Op_VecY:
2058       case Op_VecZ:
2059         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
2060         break;
2061       default:
2062         ShouldNotReachHere();
2063       }
2064     }
2065 #endif
2066   }
2067   bool is_single_byte = false;
2068   int vec_len = 0;
2069   if ((UseAVX > 2) && (stack_offset != 0)) {
2070     int tuple_type = Assembler::EVEX_FVM;
2071     int input_size = Assembler::EVEX_32bit;
2072     switch (ireg) {
2073     case Op_VecS:
2074       tuple_type = Assembler::EVEX_T1S;
2075       break;
2076     case Op_VecD:
2077       tuple_type = Assembler::EVEX_T1S;
2078       input_size = Assembler::EVEX_64bit;
2079       break;
2080     case Op_VecX:
2081       break;
2082     case Op_VecY:
2083       vec_len = 1;
2084       break;
2085     case Op_VecZ:
2086       vec_len = 2;
2087       break;
2088     }
2089     is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0);
2090   }
2091   int offset_size = 0;
2092   int size = 5;
2093   if (UseAVX > 2 ) {
2094     if (VM_Version::supports_avx512novl() && (vec_len == 2)) {
2095       offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
2096       size += 2; // Need an additional two bytes for EVEX encoding
2097     } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) {
2098       offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4);
2099     } else {
2100       offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
2101       size += 2; // Need an additional two bytes for EVEX encodding
2102     }
2103   } else {
2104     offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4);
2105   }
2106   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
2107   return size+offset_size;
2108 }
2109 
2110 static inline jint replicate4_imm(int con, int width) {
2111   // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
2112   assert(width == 1 || width == 2, "only byte or short types here");
2113   int bit_width = width * 8;
2114   jint val = con;
2115   val &= (1 << bit_width) - 1;  // mask off sign bits
2116   while(bit_width < 32) {
2117     val |= (val << bit_width);
2118     bit_width <<= 1;
2119   }
2120   return val;
2121 }
2122 
2123 static inline jlong replicate8_imm(int con, int width) {
2124   // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
2125   assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
2126   int bit_width = width * 8;
2127   jlong val = con;
2128   val &= (((jlong) 1) << bit_width) - 1;  // mask off sign bits
2129   while(bit_width < 64) {
2130     val |= (val << bit_width);
2131     bit_width <<= 1;
2132   }
2133   return val;
2134 }
2135 
2136 #ifndef PRODUCT
2137   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2138     st->print("nop \t# %d bytes pad for loops and calls", _count);
2139   }
2140 #endif
2141 
2142   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2143     C2_MacroAssembler _masm(&cbuf);
2144     __ nop(_count);
2145   }
2146 
2147   uint MachNopNode::size(PhaseRegAlloc*) const {
2148     return _count;
2149   }
2150 
2151 #ifndef PRODUCT
2152   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
2153     st->print("# breakpoint");
2154   }
2155 #endif
2156 
2157   void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
2158     C2_MacroAssembler _masm(&cbuf);
2159     __ int3();
2160   }
2161 
2162   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
2163     return MachNode::size(ra_);
2164   }
2165 
2166 %}
2167 
2168 encode %{
2169 
2170   enc_class call_epilog %{
2171     if (VerifyStackAtCalls) {
2172       // Check that stack depth is unchanged: find majik cookie on stack
2173       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
2174       C2_MacroAssembler _masm(&cbuf);
2175       Label L;
2176       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
2177       __ jccb(Assembler::equal, L);
2178       // Die if stack mismatch
2179       __ int3();
2180       __ bind(L);
2181     }
2182   %}
2183 
2184 %}
2185 
2186 
2187 //----------OPERANDS-----------------------------------------------------------
2188 // Operand definitions must precede instruction definitions for correct parsing
2189 // in the ADLC because operands constitute user defined types which are used in
2190 // instruction definitions.
2191 
2192 // Vectors
2193 
2194 // Dummy generic vector class. Should be used for all vector operands.
2195 // Replaced with vec[SDXYZ] during post-selection pass.
2196 operand vec() %{
2197   constraint(ALLOC_IN_RC(dynamic));
2198   match(VecX);
2199   match(VecY);
2200   match(VecZ);
2201   match(VecS);
2202   match(VecD);
2203 
2204   format %{ %}
2205   interface(REG_INTER);
2206 %}
2207 
2208 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
2209 // Replaced with legVec[SDXYZ] during post-selection cleanup.
2210 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
2211 // runtime code generation via reg_class_dynamic.
2212 operand legVec() %{
2213   constraint(ALLOC_IN_RC(dynamic));
2214   match(VecX);
2215   match(VecY);
2216   match(VecZ);
2217   match(VecS);
2218   match(VecD);
2219 
2220   format %{ %}
2221   interface(REG_INTER);
2222 %}
2223 
2224 // Replaces vec during post-selection cleanup. See above.
2225 operand vecS() %{
2226   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
2227   match(VecS);
2228 
2229   format %{ %}
2230   interface(REG_INTER);
2231 %}
2232 
2233 // Replaces legVec during post-selection cleanup. See above.
2234 operand legVecS() %{
2235   constraint(ALLOC_IN_RC(vectors_reg_legacy));
2236   match(VecS);
2237 
2238   format %{ %}
2239   interface(REG_INTER);
2240 %}
2241 
2242 // Replaces vec during post-selection cleanup. See above.
2243 operand vecD() %{
2244   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
2245   match(VecD);
2246 
2247   format %{ %}
2248   interface(REG_INTER);
2249 %}
2250 
2251 // Replaces legVec during post-selection cleanup. See above.
2252 operand legVecD() %{
2253   constraint(ALLOC_IN_RC(vectord_reg_legacy));
2254   match(VecD);
2255 
2256   format %{ %}
2257   interface(REG_INTER);
2258 %}
2259 
2260 // Replaces vec during post-selection cleanup. See above.
2261 operand vecX() %{
2262   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
2263   match(VecX);
2264 
2265   format %{ %}
2266   interface(REG_INTER);
2267 %}
2268 
2269 // Replaces legVec during post-selection cleanup. See above.
2270 operand legVecX() %{
2271   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
2272   match(VecX);
2273 
2274   format %{ %}
2275   interface(REG_INTER);
2276 %}
2277 
2278 // Replaces vec during post-selection cleanup. See above.
2279 operand vecY() %{
2280   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
2281   match(VecY);
2282 
2283   format %{ %}
2284   interface(REG_INTER);
2285 %}
2286 
2287 // Replaces legVec during post-selection cleanup. See above.
2288 operand legVecY() %{
2289   constraint(ALLOC_IN_RC(vectory_reg_legacy));
2290   match(VecY);
2291 
2292   format %{ %}
2293   interface(REG_INTER);
2294 %}
2295 
2296 // Replaces vec during post-selection cleanup. See above.
2297 operand vecZ() %{
2298   constraint(ALLOC_IN_RC(vectorz_reg));
2299   match(VecZ);
2300 
2301   format %{ %}
2302   interface(REG_INTER);
2303 %}
2304 
2305 // Replaces legVec during post-selection cleanup. See above.
2306 operand legVecZ() %{
2307   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
2308   match(VecZ);
2309 
2310   format %{ %}
2311   interface(REG_INTER);
2312 %}
2313 
2314 // Comparison Code for FP conditional move
2315 operand cmpOp_vcmppd() %{
2316   match(Bool);
2317 
2318   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
2319             n->as_Bool()->_test._test != BoolTest::no_overflow);
2320   format %{ "" %}
2321   interface(COND_INTER) %{
2322     equal        (0x0, "eq");
2323     less         (0x1, "lt");
2324     less_equal   (0x2, "le");
2325     not_equal    (0xC, "ne");
2326     greater_equal(0xD, "ge");
2327     greater      (0xE, "gt");
2328     //TODO cannot compile (adlc breaks) without two next lines with error:
2329     // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{
2330     // equal' for overflow.
2331     overflow     (0x20, "o");  // not really supported by the instruction
2332     no_overflow  (0x21, "no"); // not really supported by the instruction
2333   %}
2334 %}
2335 
2336 
2337 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
2338 
2339 // ============================================================================
2340 
2341 instruct ShouldNotReachHere() %{
2342   match(Halt);
2343   format %{ "stop\t# ShouldNotReachHere" %}
2344   ins_encode %{
2345     if (is_reachable()) {
2346       __ stop(_halt_reason);
2347     }
2348   %}
2349   ins_pipe(pipe_slow);
2350 %}
2351 
2352 // =================================EVEX special===============================
2353 
2354 instruct setMask(rRegI dst, rRegI src) %{
2355   predicate(Matcher::has_predicated_vectors());
2356   match(Set dst (SetVectMaskI  src));
2357   effect(TEMP dst);
2358   format %{ "setvectmask   $dst, $src" %}
2359   ins_encode %{
2360     __ setvectmask($dst$$Register, $src$$Register);
2361   %}
2362   ins_pipe(pipe_slow);
2363 %}
2364 
2365 // ============================================================================
2366 
2367 instruct addF_reg(regF dst, regF src) %{
2368   predicate((UseSSE>=1) && (UseAVX == 0));
2369   match(Set dst (AddF dst src));
2370 
2371   format %{ "addss   $dst, $src" %}
2372   ins_cost(150);
2373   ins_encode %{
2374     __ addss($dst$$XMMRegister, $src$$XMMRegister);
2375   %}
2376   ins_pipe(pipe_slow);
2377 %}
2378 
2379 instruct addF_mem(regF dst, memory src) %{
2380   predicate((UseSSE>=1) && (UseAVX == 0));
2381   match(Set dst (AddF dst (LoadF src)));
2382 
2383   format %{ "addss   $dst, $src" %}
2384   ins_cost(150);
2385   ins_encode %{
2386     __ addss($dst$$XMMRegister, $src$$Address);
2387   %}
2388   ins_pipe(pipe_slow);
2389 %}
2390 
2391 instruct addF_imm(regF dst, immF con) %{
2392   predicate((UseSSE>=1) && (UseAVX == 0));
2393   match(Set dst (AddF dst con));
2394   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2395   ins_cost(150);
2396   ins_encode %{
2397     __ addss($dst$$XMMRegister, $constantaddress($con));
2398   %}
2399   ins_pipe(pipe_slow);
2400 %}
2401 
2402 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
2403   predicate(UseAVX > 0);
2404   match(Set dst (AddF src1 src2));
2405 
2406   format %{ "vaddss  $dst, $src1, $src2" %}
2407   ins_cost(150);
2408   ins_encode %{
2409     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2410   %}
2411   ins_pipe(pipe_slow);
2412 %}
2413 
2414 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
2415   predicate(UseAVX > 0);
2416   match(Set dst (AddF src1 (LoadF src2)));
2417 
2418   format %{ "vaddss  $dst, $src1, $src2" %}
2419   ins_cost(150);
2420   ins_encode %{
2421     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2422   %}
2423   ins_pipe(pipe_slow);
2424 %}
2425 
2426 instruct addF_reg_imm(regF dst, regF src, immF con) %{
2427   predicate(UseAVX > 0);
2428   match(Set dst (AddF src con));
2429 
2430   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2431   ins_cost(150);
2432   ins_encode %{
2433     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2434   %}
2435   ins_pipe(pipe_slow);
2436 %}
2437 
2438 instruct addD_reg(regD dst, regD src) %{
2439   predicate((UseSSE>=2) && (UseAVX == 0));
2440   match(Set dst (AddD dst src));
2441 
2442   format %{ "addsd   $dst, $src" %}
2443   ins_cost(150);
2444   ins_encode %{
2445     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
2446   %}
2447   ins_pipe(pipe_slow);
2448 %}
2449 
2450 instruct addD_mem(regD dst, memory src) %{
2451   predicate((UseSSE>=2) && (UseAVX == 0));
2452   match(Set dst (AddD dst (LoadD src)));
2453 
2454   format %{ "addsd   $dst, $src" %}
2455   ins_cost(150);
2456   ins_encode %{
2457     __ addsd($dst$$XMMRegister, $src$$Address);
2458   %}
2459   ins_pipe(pipe_slow);
2460 %}
2461 
2462 instruct addD_imm(regD dst, immD con) %{
2463   predicate((UseSSE>=2) && (UseAVX == 0));
2464   match(Set dst (AddD dst con));
2465   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2466   ins_cost(150);
2467   ins_encode %{
2468     __ addsd($dst$$XMMRegister, $constantaddress($con));
2469   %}
2470   ins_pipe(pipe_slow);
2471 %}
2472 
2473 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
2474   predicate(UseAVX > 0);
2475   match(Set dst (AddD src1 src2));
2476 
2477   format %{ "vaddsd  $dst, $src1, $src2" %}
2478   ins_cost(150);
2479   ins_encode %{
2480     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2481   %}
2482   ins_pipe(pipe_slow);
2483 %}
2484 
2485 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
2486   predicate(UseAVX > 0);
2487   match(Set dst (AddD src1 (LoadD src2)));
2488 
2489   format %{ "vaddsd  $dst, $src1, $src2" %}
2490   ins_cost(150);
2491   ins_encode %{
2492     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2493   %}
2494   ins_pipe(pipe_slow);
2495 %}
2496 
2497 instruct addD_reg_imm(regD dst, regD src, immD con) %{
2498   predicate(UseAVX > 0);
2499   match(Set dst (AddD src con));
2500 
2501   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2502   ins_cost(150);
2503   ins_encode %{
2504     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2505   %}
2506   ins_pipe(pipe_slow);
2507 %}
2508 
2509 instruct subF_reg(regF dst, regF src) %{
2510   predicate((UseSSE>=1) && (UseAVX == 0));
2511   match(Set dst (SubF dst src));
2512 
2513   format %{ "subss   $dst, $src" %}
2514   ins_cost(150);
2515   ins_encode %{
2516     __ subss($dst$$XMMRegister, $src$$XMMRegister);
2517   %}
2518   ins_pipe(pipe_slow);
2519 %}
2520 
2521 instruct subF_mem(regF dst, memory src) %{
2522   predicate((UseSSE>=1) && (UseAVX == 0));
2523   match(Set dst (SubF dst (LoadF src)));
2524 
2525   format %{ "subss   $dst, $src" %}
2526   ins_cost(150);
2527   ins_encode %{
2528     __ subss($dst$$XMMRegister, $src$$Address);
2529   %}
2530   ins_pipe(pipe_slow);
2531 %}
2532 
2533 instruct subF_imm(regF dst, immF con) %{
2534   predicate((UseSSE>=1) && (UseAVX == 0));
2535   match(Set dst (SubF dst con));
2536   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2537   ins_cost(150);
2538   ins_encode %{
2539     __ subss($dst$$XMMRegister, $constantaddress($con));
2540   %}
2541   ins_pipe(pipe_slow);
2542 %}
2543 
2544 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
2545   predicate(UseAVX > 0);
2546   match(Set dst (SubF src1 src2));
2547 
2548   format %{ "vsubss  $dst, $src1, $src2" %}
2549   ins_cost(150);
2550   ins_encode %{
2551     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2552   %}
2553   ins_pipe(pipe_slow);
2554 %}
2555 
2556 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
2557   predicate(UseAVX > 0);
2558   match(Set dst (SubF src1 (LoadF src2)));
2559 
2560   format %{ "vsubss  $dst, $src1, $src2" %}
2561   ins_cost(150);
2562   ins_encode %{
2563     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2564   %}
2565   ins_pipe(pipe_slow);
2566 %}
2567 
2568 instruct subF_reg_imm(regF dst, regF src, immF con) %{
2569   predicate(UseAVX > 0);
2570   match(Set dst (SubF src con));
2571 
2572   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2573   ins_cost(150);
2574   ins_encode %{
2575     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2576   %}
2577   ins_pipe(pipe_slow);
2578 %}
2579 
2580 instruct subD_reg(regD dst, regD src) %{
2581   predicate((UseSSE>=2) && (UseAVX == 0));
2582   match(Set dst (SubD dst src));
2583 
2584   format %{ "subsd   $dst, $src" %}
2585   ins_cost(150);
2586   ins_encode %{
2587     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
2588   %}
2589   ins_pipe(pipe_slow);
2590 %}
2591 
2592 instruct subD_mem(regD dst, memory src) %{
2593   predicate((UseSSE>=2) && (UseAVX == 0));
2594   match(Set dst (SubD dst (LoadD src)));
2595 
2596   format %{ "subsd   $dst, $src" %}
2597   ins_cost(150);
2598   ins_encode %{
2599     __ subsd($dst$$XMMRegister, $src$$Address);
2600   %}
2601   ins_pipe(pipe_slow);
2602 %}
2603 
2604 instruct subD_imm(regD dst, immD con) %{
2605   predicate((UseSSE>=2) && (UseAVX == 0));
2606   match(Set dst (SubD dst con));
2607   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2608   ins_cost(150);
2609   ins_encode %{
2610     __ subsd($dst$$XMMRegister, $constantaddress($con));
2611   %}
2612   ins_pipe(pipe_slow);
2613 %}
2614 
2615 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
2616   predicate(UseAVX > 0);
2617   match(Set dst (SubD src1 src2));
2618 
2619   format %{ "vsubsd  $dst, $src1, $src2" %}
2620   ins_cost(150);
2621   ins_encode %{
2622     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2623   %}
2624   ins_pipe(pipe_slow);
2625 %}
2626 
2627 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
2628   predicate(UseAVX > 0);
2629   match(Set dst (SubD src1 (LoadD src2)));
2630 
2631   format %{ "vsubsd  $dst, $src1, $src2" %}
2632   ins_cost(150);
2633   ins_encode %{
2634     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2635   %}
2636   ins_pipe(pipe_slow);
2637 %}
2638 
2639 instruct subD_reg_imm(regD dst, regD src, immD con) %{
2640   predicate(UseAVX > 0);
2641   match(Set dst (SubD src con));
2642 
2643   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2644   ins_cost(150);
2645   ins_encode %{
2646     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2647   %}
2648   ins_pipe(pipe_slow);
2649 %}
2650 
2651 instruct mulF_reg(regF dst, regF src) %{
2652   predicate((UseSSE>=1) && (UseAVX == 0));
2653   match(Set dst (MulF dst src));
2654 
2655   format %{ "mulss   $dst, $src" %}
2656   ins_cost(150);
2657   ins_encode %{
2658     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
2659   %}
2660   ins_pipe(pipe_slow);
2661 %}
2662 
2663 instruct mulF_mem(regF dst, memory src) %{
2664   predicate((UseSSE>=1) && (UseAVX == 0));
2665   match(Set dst (MulF dst (LoadF src)));
2666 
2667   format %{ "mulss   $dst, $src" %}
2668   ins_cost(150);
2669   ins_encode %{
2670     __ mulss($dst$$XMMRegister, $src$$Address);
2671   %}
2672   ins_pipe(pipe_slow);
2673 %}
2674 
2675 instruct mulF_imm(regF dst, immF con) %{
2676   predicate((UseSSE>=1) && (UseAVX == 0));
2677   match(Set dst (MulF dst con));
2678   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2679   ins_cost(150);
2680   ins_encode %{
2681     __ mulss($dst$$XMMRegister, $constantaddress($con));
2682   %}
2683   ins_pipe(pipe_slow);
2684 %}
2685 
2686 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
2687   predicate(UseAVX > 0);
2688   match(Set dst (MulF src1 src2));
2689 
2690   format %{ "vmulss  $dst, $src1, $src2" %}
2691   ins_cost(150);
2692   ins_encode %{
2693     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2694   %}
2695   ins_pipe(pipe_slow);
2696 %}
2697 
2698 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
2699   predicate(UseAVX > 0);
2700   match(Set dst (MulF src1 (LoadF src2)));
2701 
2702   format %{ "vmulss  $dst, $src1, $src2" %}
2703   ins_cost(150);
2704   ins_encode %{
2705     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2706   %}
2707   ins_pipe(pipe_slow);
2708 %}
2709 
2710 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
2711   predicate(UseAVX > 0);
2712   match(Set dst (MulF src con));
2713 
2714   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2715   ins_cost(150);
2716   ins_encode %{
2717     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2718   %}
2719   ins_pipe(pipe_slow);
2720 %}
2721 
2722 instruct mulD_reg(regD dst, regD src) %{
2723   predicate((UseSSE>=2) && (UseAVX == 0));
2724   match(Set dst (MulD dst src));
2725 
2726   format %{ "mulsd   $dst, $src" %}
2727   ins_cost(150);
2728   ins_encode %{
2729     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
2730   %}
2731   ins_pipe(pipe_slow);
2732 %}
2733 
2734 instruct mulD_mem(regD dst, memory src) %{
2735   predicate((UseSSE>=2) && (UseAVX == 0));
2736   match(Set dst (MulD dst (LoadD src)));
2737 
2738   format %{ "mulsd   $dst, $src" %}
2739   ins_cost(150);
2740   ins_encode %{
2741     __ mulsd($dst$$XMMRegister, $src$$Address);
2742   %}
2743   ins_pipe(pipe_slow);
2744 %}
2745 
2746 instruct mulD_imm(regD dst, immD con) %{
2747   predicate((UseSSE>=2) && (UseAVX == 0));
2748   match(Set dst (MulD dst con));
2749   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2750   ins_cost(150);
2751   ins_encode %{
2752     __ mulsd($dst$$XMMRegister, $constantaddress($con));
2753   %}
2754   ins_pipe(pipe_slow);
2755 %}
2756 
2757 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
2758   predicate(UseAVX > 0);
2759   match(Set dst (MulD src1 src2));
2760 
2761   format %{ "vmulsd  $dst, $src1, $src2" %}
2762   ins_cost(150);
2763   ins_encode %{
2764     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2765   %}
2766   ins_pipe(pipe_slow);
2767 %}
2768 
2769 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
2770   predicate(UseAVX > 0);
2771   match(Set dst (MulD src1 (LoadD src2)));
2772 
2773   format %{ "vmulsd  $dst, $src1, $src2" %}
2774   ins_cost(150);
2775   ins_encode %{
2776     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2777   %}
2778   ins_pipe(pipe_slow);
2779 %}
2780 
2781 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
2782   predicate(UseAVX > 0);
2783   match(Set dst (MulD src con));
2784 
2785   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2786   ins_cost(150);
2787   ins_encode %{
2788     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2789   %}
2790   ins_pipe(pipe_slow);
2791 %}
2792 
2793 instruct divF_reg(regF dst, regF src) %{
2794   predicate((UseSSE>=1) && (UseAVX == 0));
2795   match(Set dst (DivF dst src));
2796 
2797   format %{ "divss   $dst, $src" %}
2798   ins_cost(150);
2799   ins_encode %{
2800     __ divss($dst$$XMMRegister, $src$$XMMRegister);
2801   %}
2802   ins_pipe(pipe_slow);
2803 %}
2804 
2805 instruct divF_mem(regF dst, memory src) %{
2806   predicate((UseSSE>=1) && (UseAVX == 0));
2807   match(Set dst (DivF dst (LoadF src)));
2808 
2809   format %{ "divss   $dst, $src" %}
2810   ins_cost(150);
2811   ins_encode %{
2812     __ divss($dst$$XMMRegister, $src$$Address);
2813   %}
2814   ins_pipe(pipe_slow);
2815 %}
2816 
2817 instruct divF_imm(regF dst, immF con) %{
2818   predicate((UseSSE>=1) && (UseAVX == 0));
2819   match(Set dst (DivF dst con));
2820   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2821   ins_cost(150);
2822   ins_encode %{
2823     __ divss($dst$$XMMRegister, $constantaddress($con));
2824   %}
2825   ins_pipe(pipe_slow);
2826 %}
2827 
2828 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
2829   predicate(UseAVX > 0);
2830   match(Set dst (DivF src1 src2));
2831 
2832   format %{ "vdivss  $dst, $src1, $src2" %}
2833   ins_cost(150);
2834   ins_encode %{
2835     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2836   %}
2837   ins_pipe(pipe_slow);
2838 %}
2839 
2840 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
2841   predicate(UseAVX > 0);
2842   match(Set dst (DivF src1 (LoadF src2)));
2843 
2844   format %{ "vdivss  $dst, $src1, $src2" %}
2845   ins_cost(150);
2846   ins_encode %{
2847     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2848   %}
2849   ins_pipe(pipe_slow);
2850 %}
2851 
2852 instruct divF_reg_imm(regF dst, regF src, immF con) %{
2853   predicate(UseAVX > 0);
2854   match(Set dst (DivF src con));
2855 
2856   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2857   ins_cost(150);
2858   ins_encode %{
2859     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2860   %}
2861   ins_pipe(pipe_slow);
2862 %}
2863 
2864 instruct divD_reg(regD dst, regD src) %{
2865   predicate((UseSSE>=2) && (UseAVX == 0));
2866   match(Set dst (DivD dst src));
2867 
2868   format %{ "divsd   $dst, $src" %}
2869   ins_cost(150);
2870   ins_encode %{
2871     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
2872   %}
2873   ins_pipe(pipe_slow);
2874 %}
2875 
2876 instruct divD_mem(regD dst, memory src) %{
2877   predicate((UseSSE>=2) && (UseAVX == 0));
2878   match(Set dst (DivD dst (LoadD src)));
2879 
2880   format %{ "divsd   $dst, $src" %}
2881   ins_cost(150);
2882   ins_encode %{
2883     __ divsd($dst$$XMMRegister, $src$$Address);
2884   %}
2885   ins_pipe(pipe_slow);
2886 %}
2887 
2888 instruct divD_imm(regD dst, immD con) %{
2889   predicate((UseSSE>=2) && (UseAVX == 0));
2890   match(Set dst (DivD dst con));
2891   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2892   ins_cost(150);
2893   ins_encode %{
2894     __ divsd($dst$$XMMRegister, $constantaddress($con));
2895   %}
2896   ins_pipe(pipe_slow);
2897 %}
2898 
2899 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
2900   predicate(UseAVX > 0);
2901   match(Set dst (DivD src1 src2));
2902 
2903   format %{ "vdivsd  $dst, $src1, $src2" %}
2904   ins_cost(150);
2905   ins_encode %{
2906     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2907   %}
2908   ins_pipe(pipe_slow);
2909 %}
2910 
2911 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
2912   predicate(UseAVX > 0);
2913   match(Set dst (DivD src1 (LoadD src2)));
2914 
2915   format %{ "vdivsd  $dst, $src1, $src2" %}
2916   ins_cost(150);
2917   ins_encode %{
2918     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2919   %}
2920   ins_pipe(pipe_slow);
2921 %}
2922 
2923 instruct divD_reg_imm(regD dst, regD src, immD con) %{
2924   predicate(UseAVX > 0);
2925   match(Set dst (DivD src con));
2926 
2927   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
2928   ins_cost(150);
2929   ins_encode %{
2930     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2931   %}
2932   ins_pipe(pipe_slow);
2933 %}
2934 
2935 instruct absF_reg(regF dst) %{
2936   predicate((UseSSE>=1) && (UseAVX == 0));
2937   match(Set dst (AbsF dst));
2938   ins_cost(150);
2939   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
2940   ins_encode %{
2941     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
2942   %}
2943   ins_pipe(pipe_slow);
2944 %}
2945 
2946 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
2947   predicate(UseAVX > 0);
2948   match(Set dst (AbsF src));
2949   ins_cost(150);
2950   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
2951   ins_encode %{
2952     int vector_len = 0;
2953     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
2954               ExternalAddress(float_signmask()), vector_len);
2955   %}
2956   ins_pipe(pipe_slow);
2957 %}
2958 
2959 instruct absD_reg(regD dst) %{
2960   predicate((UseSSE>=2) && (UseAVX == 0));
2961   match(Set dst (AbsD dst));
2962   ins_cost(150);
2963   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
2964             "# abs double by sign masking" %}
2965   ins_encode %{
2966     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
2967   %}
2968   ins_pipe(pipe_slow);
2969 %}
2970 
2971 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
2972   predicate(UseAVX > 0);
2973   match(Set dst (AbsD src));
2974   ins_cost(150);
2975   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
2976             "# abs double by sign masking" %}
2977   ins_encode %{
2978     int vector_len = 0;
2979     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
2980               ExternalAddress(double_signmask()), vector_len);
2981   %}
2982   ins_pipe(pipe_slow);
2983 %}
2984 
2985 instruct negF_reg(regF dst) %{
2986   predicate((UseSSE>=1) && (UseAVX == 0));
2987   match(Set dst (NegF dst));
2988   ins_cost(150);
2989   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
2990   ins_encode %{
2991     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
2992   %}
2993   ins_pipe(pipe_slow);
2994 %}
2995 
2996 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
2997   predicate(UseAVX > 0);
2998   match(Set dst (NegF src));
2999   ins_cost(150);
3000   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
3001   ins_encode %{
3002     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
3003                  ExternalAddress(float_signflip()));
3004   %}
3005   ins_pipe(pipe_slow);
3006 %}
3007 
3008 instruct negD_reg(regD dst) %{
3009   predicate((UseSSE>=2) && (UseAVX == 0));
3010   match(Set dst (NegD dst));
3011   ins_cost(150);
3012   format %{ "xorpd   $dst, [0x8000000000000000]\t"
3013             "# neg double by sign flipping" %}
3014   ins_encode %{
3015     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
3016   %}
3017   ins_pipe(pipe_slow);
3018 %}
3019 
3020 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
3021   predicate(UseAVX > 0);
3022   match(Set dst (NegD src));
3023   ins_cost(150);
3024   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
3025             "# neg double by sign flipping" %}
3026   ins_encode %{
3027     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
3028                  ExternalAddress(double_signflip()));
3029   %}
3030   ins_pipe(pipe_slow);
3031 %}
3032 
3033 instruct sqrtF_reg(regF dst, regF src) %{
3034   predicate(UseSSE>=1);
3035   match(Set dst (SqrtF src));
3036 
3037   format %{ "sqrtss  $dst, $src" %}
3038   ins_cost(150);
3039   ins_encode %{
3040     __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
3041   %}
3042   ins_pipe(pipe_slow);
3043 %}
3044 
3045 instruct sqrtF_mem(regF dst, memory src) %{
3046   predicate(UseSSE>=1);
3047   match(Set dst (SqrtF (LoadF src)));
3048 
3049   format %{ "sqrtss  $dst, $src" %}
3050   ins_cost(150);
3051   ins_encode %{
3052     __ sqrtss($dst$$XMMRegister, $src$$Address);
3053   %}
3054   ins_pipe(pipe_slow);
3055 %}
3056 
3057 instruct sqrtF_imm(regF dst, immF con) %{
3058   predicate(UseSSE>=1);
3059   match(Set dst (SqrtF con));
3060 
3061   format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
3062   ins_cost(150);
3063   ins_encode %{
3064     __ sqrtss($dst$$XMMRegister, $constantaddress($con));
3065   %}
3066   ins_pipe(pipe_slow);
3067 %}
3068 
3069 instruct sqrtD_reg(regD dst, regD src) %{
3070   predicate(UseSSE>=2);
3071   match(Set dst (SqrtD src));
3072 
3073   format %{ "sqrtsd  $dst, $src" %}
3074   ins_cost(150);
3075   ins_encode %{
3076     __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
3077   %}
3078   ins_pipe(pipe_slow);
3079 %}
3080 
3081 instruct sqrtD_mem(regD dst, memory src) %{
3082   predicate(UseSSE>=2);
3083   match(Set dst (SqrtD (LoadD src)));
3084 
3085   format %{ "sqrtsd  $dst, $src" %}
3086   ins_cost(150);
3087   ins_encode %{
3088     __ sqrtsd($dst$$XMMRegister, $src$$Address);
3089   %}
3090   ins_pipe(pipe_slow);
3091 %}
3092 
3093 instruct sqrtD_imm(regD dst, immD con) %{
3094   predicate(UseSSE>=2);
3095   match(Set dst (SqrtD con));
3096   format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
3097   ins_cost(150);
3098   ins_encode %{
3099     __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
3100   %}
3101   ins_pipe(pipe_slow);
3102 %}
3103 
3104 
3105 #ifdef _LP64
3106 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
3107   match(Set dst (RoundDoubleMode src rmode));
3108   format %{ "roundsd $dst,$src" %}
3109   ins_cost(150);
3110   ins_encode %{
3111     assert(UseSSE >= 4, "required");
3112     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
3113   %}
3114   ins_pipe(pipe_slow);
3115 %}
3116 
3117 instruct roundD_mem(legRegD dst, memory src, immU8 rmode) %{
3118   match(Set dst (RoundDoubleMode (LoadD src) rmode));
3119   format %{ "roundsd $dst,$src" %}
3120   ins_cost(150);
3121   ins_encode %{
3122     assert(UseSSE >= 4, "required");
3123     __ roundsd($dst$$XMMRegister, $src$$Address, $rmode$$constant);
3124   %}
3125   ins_pipe(pipe_slow);
3126 %}
3127 
3128 instruct roundD_imm(legRegD dst, immD con, immU8 rmode, rRegI scratch_reg) %{
3129   match(Set dst (RoundDoubleMode con rmode));
3130   effect(TEMP scratch_reg);
3131   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
3132   ins_cost(150);
3133   ins_encode %{
3134     assert(UseSSE >= 4, "required");
3135     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, $scratch_reg$$Register);
3136   %}
3137   ins_pipe(pipe_slow);
3138 %}
3139 
3140 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
3141   predicate(n->as_Vector()->length() < 8);
3142   match(Set dst (RoundDoubleModeV src rmode));
3143   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
3144   ins_encode %{
3145     assert(UseAVX > 0, "required");
3146     int vector_len = vector_length_encoding(this);
3147     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vector_len);
3148   %}
3149   ins_pipe( pipe_slow );
3150 %}
3151 
3152 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
3153   predicate(n->as_Vector()->length() == 8);
3154   match(Set dst (RoundDoubleModeV src rmode));
3155   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
3156   ins_encode %{
3157     assert(UseAVX > 2, "required");
3158     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
3159   %}
3160   ins_pipe( pipe_slow );
3161 %}
3162 
3163 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
3164   predicate(n->as_Vector()->length() < 8);
3165   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
3166   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
3167   ins_encode %{
3168     assert(UseAVX > 0, "required");
3169     int vector_len = vector_length_encoding(this);
3170     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vector_len);
3171   %}
3172   ins_pipe( pipe_slow );
3173 %}
3174 
3175 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
3176   predicate(n->as_Vector()->length() == 8);
3177   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
3178   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
3179   ins_encode %{
3180     assert(UseAVX > 2, "required");
3181     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
3182   %}
3183   ins_pipe( pipe_slow );
3184 %}
3185 #endif // _LP64
3186 
3187 instruct onspinwait() %{
3188   match(OnSpinWait);
3189   ins_cost(200);
3190 
3191   format %{
3192     $$template
3193     $$emit$$"pause\t! membar_onspinwait"
3194   %}
3195   ins_encode %{
3196     __ pause();
3197   %}
3198   ins_pipe(pipe_slow);
3199 %}
3200 
3201 // a * b + c
3202 instruct fmaD_reg(regD a, regD b, regD c) %{
3203   predicate(UseFMA);
3204   match(Set c (FmaD  c (Binary a b)));
3205   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
3206   ins_cost(150);
3207   ins_encode %{
3208     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
3209   %}
3210   ins_pipe( pipe_slow );
3211 %}
3212 
3213 // a * b + c
3214 instruct fmaF_reg(regF a, regF b, regF c) %{
3215   predicate(UseFMA);
3216   match(Set c (FmaF  c (Binary a b)));
3217   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
3218   ins_cost(150);
3219   ins_encode %{
3220     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
3221   %}
3222   ins_pipe( pipe_slow );
3223 %}
3224 
3225 // ====================VECTOR INSTRUCTIONS=====================================
3226 
3227 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
3228 instruct MoveVec2Leg(legVec dst, vec src) %{
3229   match(Set dst src);
3230   format %{ "" %}
3231   ins_encode %{
3232     ShouldNotReachHere();
3233   %}
3234   ins_pipe( fpu_reg_reg );
3235 %}
3236 
3237 instruct MoveLeg2Vec(vec dst, legVec src) %{
3238   match(Set dst src);
3239   format %{ "" %}
3240   ins_encode %{
3241     ShouldNotReachHere();
3242   %}
3243   ins_pipe( fpu_reg_reg );
3244 %}
3245 
3246 // ============================================================================
3247 
3248 // Load vectors
3249 instruct loadV(vec dst, memory mem) %{
3250   match(Set dst (LoadVector mem));
3251   ins_cost(125);
3252   format %{ "load_vector $dst,$mem" %}
3253   ins_encode %{
3254     switch (vector_length_in_bytes(this)) {
3255       case  4: __ movdl    ($dst$$XMMRegister, $mem$$Address); break;
3256       case  8: __ movq     ($dst$$XMMRegister, $mem$$Address); break;
3257       case 16: __ movdqu   ($dst$$XMMRegister, $mem$$Address); break;
3258       case 32: __ vmovdqu  ($dst$$XMMRegister, $mem$$Address); break;
3259       case 64: __ evmovdqul($dst$$XMMRegister, $mem$$Address, Assembler::AVX_512bit); break;
3260       default: ShouldNotReachHere();
3261     }
3262   %}
3263   ins_pipe( pipe_slow );
3264 %}
3265 
3266 // Store vectors generic operand pattern.
3267 instruct storeV(memory mem, vec src) %{
3268   match(Set mem (StoreVector mem src));
3269   ins_cost(145);
3270   format %{ "store_vector $mem,$src\n\t" %}
3271   ins_encode %{
3272     switch (vector_length_in_bytes(this, $src)) {
3273       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
3274       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
3275       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
3276       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
3277       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
3278       default: ShouldNotReachHere();
3279     }
3280   %}
3281   ins_pipe( pipe_slow );
3282 %}
3283 
3284 // ====================REPLICATE=======================================
3285 
3286 // Replicate byte scalar to be vector
3287 instruct ReplB_reg(vec dst, rRegI src) %{
3288   match(Set dst (ReplicateB src));
3289   format %{ "replicateB $dst,$src" %}
3290   ins_encode %{
3291     uint vlen = vector_length(this);
3292     if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
3293       assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
3294       int vlen_enc = vector_length_encoding(this);
3295       __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
3296     } else {
3297       __ movdl($dst$$XMMRegister, $src$$Register);
3298       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
3299       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3300       if (vlen >= 16) {
3301         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3302         if (vlen >= 32) {
3303           assert(vlen == 32, "sanity");
3304           __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
3305         }
3306       }
3307     }
3308   %}
3309   ins_pipe( pipe_slow );
3310 %}
3311 
3312 instruct ReplB_mem(vec dst, memory mem) %{
3313   predicate(VM_Version::supports_avx2());
3314   match(Set dst (ReplicateB (LoadB mem)));
3315   format %{ "replicateB $dst,$mem" %}
3316   ins_encode %{
3317     int vector_len = vector_length_encoding(this);
3318     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
3319   %}
3320   ins_pipe( pipe_slow );
3321 %}
3322 
3323 instruct ReplB_imm(vec dst, immI con) %{
3324   match(Set dst (ReplicateB con));
3325   format %{ "replicateB $dst,$con" %}
3326   ins_encode %{
3327     uint vlen = vector_length(this);
3328     InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 1));
3329     if (vlen == 4) {
3330       __ movdl($dst$$XMMRegister, const_addr);
3331     } else {
3332       __ movq($dst$$XMMRegister, const_addr);
3333       if (vlen >= 16) {
3334         if (VM_Version::supports_avx2()) {
3335           int vlen_enc = vector_length_encoding(this);
3336           __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
3337         } else {
3338           assert(vlen == 16, "sanity");
3339           __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3340         }
3341       }
3342     }
3343   %}
3344   ins_pipe( pipe_slow );
3345 %}
3346 
3347 // Replicate byte scalar zero to be vector
3348 instruct ReplB_zero(vec dst, immI0 zero) %{
3349   match(Set dst (ReplicateB zero));
3350   format %{ "replicateB $dst,$zero" %}
3351   ins_encode %{
3352     uint vlen = vector_length(this);
3353     if (vlen <= 16) {
3354       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3355     } else {
3356       // Use vpxor since AVX512F does not have 512bit vxorpd (requires AVX512DQ).
3357       int vlen_enc = vector_length_encoding(this);
3358       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
3359     }
3360   %}
3361   ins_pipe( fpu_reg_reg );
3362 %}
3363 
3364 // ====================ReplicateS=======================================
3365 
3366 instruct ReplS_reg(vec dst, rRegI src) %{
3367   match(Set dst (ReplicateS src));
3368   format %{ "replicateS $dst,$src" %}
3369   ins_encode %{
3370     uint vlen = vector_length(this);
3371     if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
3372       assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
3373       int vlen_enc = vector_length_encoding(this);
3374       __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
3375     } else {
3376       __ movdl($dst$$XMMRegister, $src$$Register);
3377       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3378       if (vlen >= 8) {
3379         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3380         if (vlen >= 16) {
3381           assert(vlen == 16, "sanity");
3382           __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
3383         }
3384       }
3385     }
3386   %}
3387   ins_pipe( pipe_slow );
3388 %}
3389 
3390 instruct ReplS_mem(vec dst, memory mem) %{
3391   predicate(VM_Version::supports_avx2());
3392   match(Set dst (ReplicateS (LoadS mem)));
3393   format %{ "replicateS $dst,$mem" %}
3394   ins_encode %{
3395     int vlen_enc = vector_length_encoding(this);
3396     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
3397   %}
3398   ins_pipe( pipe_slow );
3399 %}
3400 
3401 instruct ReplS_imm(vec dst, immI con) %{
3402   match(Set dst (ReplicateS con));
3403   format %{ "replicateS $dst,$con" %}
3404   ins_encode %{
3405     uint vlen = vector_length(this);
3406     InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 2));
3407     if (vlen == 2) {
3408       __ movdl($dst$$XMMRegister, const_addr);
3409     } else {
3410       __ movq($dst$$XMMRegister, const_addr);
3411       if (vlen >= 8) {
3412         if (VM_Version::supports_avx2()) {
3413           int vlen_enc = vector_length_encoding(this);
3414           __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
3415         } else {
3416           assert(vlen == 8, "sanity");
3417           __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3418         }
3419       }
3420     }
3421   %}
3422   ins_pipe( fpu_reg_reg );
3423 %}
3424 
3425 instruct ReplS_zero(vec dst, immI0 zero) %{
3426   match(Set dst (ReplicateS zero));
3427   format %{ "replicateS $dst,$zero" %}
3428   ins_encode %{
3429     uint vlen = vector_length(this);
3430     if (vlen <= 8) {
3431       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3432     } else {
3433       int vlen_enc = vector_length_encoding(this);
3434       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
3435     }
3436   %}
3437   ins_pipe( fpu_reg_reg );
3438 %}
3439 
3440 // ====================ReplicateI=======================================
3441 
3442 instruct ReplI_reg(vec dst, rRegI src) %{
3443   match(Set dst (ReplicateI src));
3444   format %{ "replicateI $dst,$src" %}
3445   ins_encode %{
3446     uint vlen = vector_length(this);
3447     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
3448       int vlen_enc = vector_length_encoding(this);
3449       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
3450     } else {
3451       __ movdl($dst$$XMMRegister, $src$$Register);
3452       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3453       if (vlen >= 8) {
3454         assert(vlen == 8, "sanity");
3455         __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
3456       }
3457     }
3458   %}
3459   ins_pipe( pipe_slow );
3460 %}
3461 
3462 instruct ReplI_mem(vec dst, memory mem) %{
3463   match(Set dst (ReplicateI (LoadI mem)));
3464   format %{ "replicateI $dst,$mem" %}
3465   ins_encode %{
3466     uint vlen = vector_length(this);
3467     if (vlen <= 4) {
3468       __ movdl($dst$$XMMRegister, $mem$$Address);
3469       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3470     } else {
3471       assert(VM_Version::supports_avx2(), "sanity");
3472       int vector_len = vector_length_encoding(this);
3473       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
3474     }
3475   %}
3476   ins_pipe( pipe_slow );
3477 %}
3478 
3479 instruct ReplI_imm(vec dst, immI con) %{
3480   match(Set dst (ReplicateI con));
3481   format %{ "replicateI $dst,$con" %}
3482   ins_encode %{
3483     uint vlen = vector_length(this);
3484     InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 4));
3485     if (vlen <= 4) {
3486       __ movq($dst$$XMMRegister, const_addr);
3487       if (vlen == 4) {
3488         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3489       }
3490     } else {
3491       assert(VM_Version::supports_avx2(), "sanity");
3492       int vector_len = vector_length_encoding(this);
3493       __ movq($dst$$XMMRegister, const_addr);
3494       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3495     }
3496   %}
3497   ins_pipe( pipe_slow );
3498 %}
3499 
3500 // Replicate integer (4 byte) scalar zero to be vector
3501 instruct ReplI_zero(vec dst, immI0 zero) %{
3502   match(Set dst (ReplicateI zero));
3503   format %{ "replicateI $dst,$zero" %}
3504   ins_encode %{
3505     uint vlen = vector_length(this);
3506     if (vlen <= 4) {
3507       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3508     } else {
3509       int vlen_enc = vector_length_encoding(this);
3510       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
3511     }
3512   %}
3513   ins_pipe( fpu_reg_reg );
3514 %}
3515 
3516 instruct ReplI_M1(vec dst, immI_M1 con) %{
3517   predicate(UseAVX > 0);
3518   match(Set dst (ReplicateB con));
3519   match(Set dst (ReplicateS con));
3520   match(Set dst (ReplicateI con));
3521   effect(TEMP dst);
3522   format %{ "vallones $dst" %}
3523   ins_encode %{
3524     int vector_len = vector_length_encoding(this);
3525     __ vallones($dst$$XMMRegister, vector_len);
3526   %}
3527   ins_pipe( pipe_slow );
3528 %}
3529 
3530 // ====================ReplicateL=======================================
3531 
3532 #ifdef _LP64
3533 // Replicate long (8 byte) scalar to be vector
3534 instruct ReplL_reg(vec dst, rRegL src) %{
3535   match(Set dst (ReplicateL src));
3536   format %{ "replicateL $dst,$src" %}
3537   ins_encode %{
3538     uint vlen = vector_length(this);
3539     if (vlen == 2) {
3540       __ movdq($dst$$XMMRegister, $src$$Register);
3541       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3542     } else if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
3543       int vlen_enc = vector_length_encoding(this);
3544       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
3545     } else {
3546       assert(vlen == 4, "sanity");
3547       __ movdq($dst$$XMMRegister, $src$$Register);
3548       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3549       __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
3550     }
3551   %}
3552   ins_pipe( pipe_slow );
3553 %}
3554 #else // _LP64
3555 // Replicate long (8 byte) scalar to be vector
3556 instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{
3557   predicate(n->as_Vector()->length() <= 4);
3558   match(Set dst (ReplicateL src));
3559   effect(TEMP dst, USE src, TEMP tmp);
3560   format %{ "replicateL $dst,$src" %}
3561   ins_encode %{
3562     uint vlen = vector_length(this);
3563     if (vlen == 2) {
3564       __ movdl($dst$$XMMRegister, $src$$Register);
3565       __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3566       __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3567       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3568     } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
3569       int vector_len = Assembler::AVX_256bit;
3570       __ movdl($dst$$XMMRegister, $src$$Register);
3571       __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3572       __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3573       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3574     } else {
3575       __ movdl($dst$$XMMRegister, $src$$Register);
3576       __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3577       __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3578       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3579       __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
3580     }
3581   %}
3582   ins_pipe( pipe_slow );
3583 %}
3584 
3585 instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{
3586   predicate(n->as_Vector()->length() == 8);
3587   match(Set dst (ReplicateL src));
3588   effect(TEMP dst, USE src, TEMP tmp);
3589   format %{ "replicateL $dst,$src" %}
3590   ins_encode %{
3591     if (VM_Version::supports_avx512vl()) {
3592       __ movdl($dst$$XMMRegister, $src$$Register);
3593       __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3594       __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3595       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3596       __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
3597       __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
3598     } else {
3599       int vector_len = Assembler::AVX_512bit;
3600       __ movdl($dst$$XMMRegister, $src$$Register);
3601       __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3602       __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3603       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3604     }
3605   %}
3606   ins_pipe( pipe_slow );
3607 %}
3608 #endif // _LP64
3609 
3610 instruct ReplL_mem(vec dst, memory mem) %{
3611   match(Set dst (ReplicateL (LoadL mem)));
3612   format %{ "replicateL $dst,$mem" %}
3613   ins_encode %{
3614     uint vlen = vector_length(this);
3615     if (vlen == 2) {
3616       __ movq($dst$$XMMRegister, $mem$$Address);
3617       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3618     } else {
3619       assert(VM_Version::supports_avx2(), "sanity");
3620       int vlen_enc = vector_length_encoding(this);
3621       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
3622     }
3623   %}
3624   ins_pipe( pipe_slow );
3625 %}
3626 
3627 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
3628 instruct ReplL_imm(vec dst, immL con) %{
3629   match(Set dst (ReplicateL con));
3630   format %{ "replicateL $dst,$con" %}
3631   ins_encode %{
3632     uint vlen = vector_length(this);
3633     InternalAddress const_addr = $constantaddress($con);
3634     if (vlen == 2) {
3635       __ movq($dst$$XMMRegister, const_addr);
3636       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3637     } else {
3638       assert(VM_Version::supports_avx2(), "sanity");
3639       int vlen_enc = vector_length_encoding(this);
3640       __ movq($dst$$XMMRegister, const_addr);
3641       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
3642     }
3643   %}
3644   ins_pipe( pipe_slow );
3645 %}
3646 
3647 instruct ReplL_zero(vec dst, immL0 zero) %{
3648   match(Set dst (ReplicateL zero));
3649   format %{ "replicateL $dst,$zero" %}
3650   ins_encode %{
3651     int vlen = vector_length(this);
3652     if (vlen == 2) {
3653       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3654     } else {
3655       int vlen_enc = vector_length_encoding(this);
3656       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
3657     }
3658   %}
3659   ins_pipe( fpu_reg_reg );
3660 %}
3661 
3662 instruct ReplL_M1(vec dst, immL_M1 con) %{
3663   predicate(UseAVX > 0);
3664   match(Set dst (ReplicateL con));
3665   effect(TEMP dst);
3666   format %{ "vallones $dst" %}
3667   ins_encode %{
3668     int vector_len = vector_length_encoding(this);
3669     __ vallones($dst$$XMMRegister, vector_len);
3670   %}
3671   ins_pipe( pipe_slow );
3672 %}
3673 
3674 // ====================ReplicateF=======================================
3675 
3676 instruct ReplF_reg(vec dst, vlRegF src) %{
3677   match(Set dst (ReplicateF src));
3678   format %{ "replicateF $dst,$src" %}
3679   ins_encode %{
3680     uint vlen = vector_length(this);
3681     if (vlen <= 4) {
3682       __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3683    } else if (VM_Version::supports_avx2()) {
3684       int vector_len = vector_length_encoding(this);
3685       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); // reg-to-reg variant requires AVX2
3686     } else {
3687       assert(vlen == 8, "sanity");
3688       __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3689       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
3690     }
3691   %}
3692   ins_pipe( pipe_slow );
3693 %}
3694 
3695 instruct ReplF_mem(vec dst, memory mem) %{
3696   match(Set dst (ReplicateF (LoadF mem)));
3697   format %{ "replicateF $dst,$mem" %}
3698   ins_encode %{
3699     uint vlen = vector_length(this);
3700     if (vlen <= 4) {
3701       __ movdl($dst$$XMMRegister, $mem$$Address);
3702       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3703     } else {
3704       assert(VM_Version::supports_avx(), "sanity");
3705       int vector_len = vector_length_encoding(this);
3706       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
3707     }
3708   %}
3709   ins_pipe( pipe_slow );
3710 %}
3711 
3712 instruct ReplF_zero(vec dst, immF0 zero) %{
3713   match(Set dst (ReplicateF zero));
3714   format %{ "replicateF $dst,$zero" %}
3715   ins_encode %{
3716     uint vlen = vector_length(this);
3717     if (vlen <= 4) {
3718       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
3719     } else {
3720       int vlen_enc = vector_length_encoding(this);
3721       __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ
3722     }
3723   %}
3724   ins_pipe( fpu_reg_reg );
3725 %}
3726 
3727 // ====================ReplicateD=======================================
3728 
3729 // Replicate double (8 bytes) scalar to be vector
3730 instruct ReplD_reg(vec dst, vlRegD src) %{
3731   match(Set dst (ReplicateD src));
3732   format %{ "replicateD $dst,$src" %}
3733   ins_encode %{
3734     uint vlen = vector_length(this);
3735     if (vlen == 2) {
3736       __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
3737     } else if (VM_Version::supports_avx2()) {
3738       int vector_len = vector_length_encoding(this);
3739       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); // reg-to-reg variant requires AVX2
3740     } else {
3741       assert(vlen == 4, "sanity");
3742       __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
3743       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
3744     }
3745   %}
3746   ins_pipe( pipe_slow );
3747 %}
3748 
3749 instruct ReplD_mem(vec dst, memory mem) %{
3750   match(Set dst (ReplicateD (LoadD mem)));
3751   format %{ "replicateD $dst,$mem" %}
3752   ins_encode %{
3753     uint vlen = vector_length(this);
3754     if (vlen == 2) {
3755       __ movq($dst$$XMMRegister, $mem$$Address);
3756       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x44);
3757     } else {
3758       assert(VM_Version::supports_avx(), "sanity");
3759       int vector_len = vector_length_encoding(this);
3760       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
3761     }
3762   %}
3763   ins_pipe( pipe_slow );
3764 %}
3765 
3766 instruct ReplD_zero(vec dst, immD0 zero) %{
3767   match(Set dst (ReplicateD zero));
3768   format %{ "replicateD $dst,$zero" %}
3769   ins_encode %{
3770     uint vlen = vector_length(this);
3771     if (vlen == 2) {
3772       __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
3773     } else {
3774       int vlen_enc = vector_length_encoding(this);
3775       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ
3776     }
3777   %}
3778   ins_pipe( fpu_reg_reg );
3779 %}
3780 
3781 // ====================REDUCTION ARITHMETIC=======================================
3782 // =======================Int Reduction==========================================
3783 
3784 instruct reductionI(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
3785   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT &&
3786             n->in(2)->bottom_type()->is_vect()->length() < 16);
3787   match(Set dst (AddReductionVI src1 src2));
3788   match(Set dst (MulReductionVI src1 src2));
3789   match(Set dst (AndReductionV  src1 src2));
3790   match(Set dst ( OrReductionV  src1 src2));
3791   match(Set dst (XorReductionV  src1 src2));
3792   effect(TEMP vtmp1, TEMP vtmp2);
3793   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
3794   ins_encode %{
3795     int opcode = this->ideal_Opcode();
3796     int vlen = vector_length(this, $src2);
3797     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
3798   %}
3799   ins_pipe( pipe_slow );
3800 %}
3801 
3802 instruct reduction16I(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
3803   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT &&
3804             n->in(2)->bottom_type()->is_vect()->length() == 16);
3805   match(Set dst (AddReductionVI src1 src2));
3806   match(Set dst (MulReductionVI src1 src2));
3807   match(Set dst (AndReductionV  src1 src2));
3808   match(Set dst ( OrReductionV  src1 src2));
3809   match(Set dst (XorReductionV  src1 src2));
3810   effect(TEMP vtmp1, TEMP vtmp2);
3811   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
3812   ins_encode %{
3813     int opcode = this->ideal_Opcode();
3814     int vlen = vector_length(this, $src2);
3815     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
3816   %}
3817   ins_pipe( pipe_slow );
3818 %}
3819 
3820 // =======================Long Reduction==========================================
3821 
3822 #ifdef _LP64
3823 instruct reductionL(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
3824   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
3825             n->in(2)->bottom_type()->is_vect()->length() < 8);
3826   match(Set dst (AddReductionVL src1 src2));
3827   match(Set dst (MulReductionVL src1 src2));
3828   match(Set dst (AndReductionV  src1 src2));
3829   match(Set dst ( OrReductionV  src1 src2));
3830   match(Set dst (XorReductionV  src1 src2));
3831   effect(TEMP vtmp1, TEMP vtmp2);
3832   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
3833   ins_encode %{
3834     int opcode = this->ideal_Opcode();
3835     int vlen = vector_length(this, $src2);
3836     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
3837   %}
3838   ins_pipe( pipe_slow );
3839 %}
3840 
3841 instruct reduction8L(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
3842   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
3843             n->in(2)->bottom_type()->is_vect()->length() == 8);
3844   match(Set dst (AddReductionVL src1 src2));
3845   match(Set dst (MulReductionVL src1 src2));
3846   match(Set dst (AndReductionV  src1 src2));
3847   match(Set dst ( OrReductionV  src1 src2));
3848   match(Set dst (XorReductionV  src1 src2));
3849   effect(TEMP vtmp1, TEMP vtmp2);
3850   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
3851   ins_encode %{
3852     int opcode = this->ideal_Opcode();
3853     int vlen = vector_length(this, $src2);
3854     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
3855   %}
3856   ins_pipe( pipe_slow );
3857 %}
3858 #endif // _LP64
3859 
3860 // =======================Float Reduction==========================================
3861 
3862 instruct reductionF128(regF dst, vec src, vec vtmp) %{
3863   predicate(n->in(2)->bottom_type()->is_vect()->length() <= 4);
3864   match(Set dst (AddReductionVF dst src));
3865   match(Set dst (MulReductionVF dst src));
3866   effect(TEMP dst, TEMP vtmp);
3867   format %{ "vector_reduction_fp  $dst,$src ; using $vtmp as TEMP" %}
3868   ins_encode %{
3869     int opcode = this->ideal_Opcode();
3870     int vlen = vector_length(this, $src);
3871     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
3872   %}
3873   ins_pipe( pipe_slow );
3874 %}
3875 
3876 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
3877   predicate(n->in(2)->bottom_type()->is_vect()->length() == 8);
3878   match(Set dst (AddReductionVF dst src));
3879   match(Set dst (MulReductionVF dst src));
3880   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
3881   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
3882   ins_encode %{
3883     int opcode = this->ideal_Opcode();
3884     int vlen = vector_length(this, $src);
3885     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
3886   %}
3887   ins_pipe( pipe_slow );
3888 %}
3889 
3890 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
3891   predicate(n->in(2)->bottom_type()->is_vect()->length() == 16);
3892   match(Set dst (AddReductionVF dst src));
3893   match(Set dst (MulReductionVF dst src));
3894   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
3895   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
3896   ins_encode %{
3897     int opcode = this->ideal_Opcode();
3898     int vlen = vector_length(this, $src);
3899     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
3900   %}
3901   ins_pipe( pipe_slow );
3902 %}
3903 
3904 // =======================Double Reduction==========================================
3905 
3906 instruct reduction2D(regD dst, vec src, vec vtmp) %{
3907   predicate(n->in(2)->bottom_type()->is_vect()->length() == 2);
3908   match(Set dst (AddReductionVD dst src));
3909   match(Set dst (MulReductionVD dst src));
3910   effect(TEMP dst, TEMP vtmp);
3911   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
3912   ins_encode %{
3913     int opcode = this->ideal_Opcode();
3914     int vlen = vector_length(this, $src);
3915     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
3916   %}
3917   ins_pipe( pipe_slow );
3918 %}
3919 
3920 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
3921   predicate(n->in(2)->bottom_type()->is_vect()->length() == 4);
3922   match(Set dst (AddReductionVD dst src));
3923   match(Set dst (MulReductionVD dst src));
3924   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
3925   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
3926   ins_encode %{
3927     int opcode = this->ideal_Opcode();
3928     int vlen = vector_length(this, $src);
3929     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
3930   %}
3931   ins_pipe( pipe_slow );
3932 %}
3933 
3934 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
3935   predicate(n->in(2)->bottom_type()->is_vect()->length() == 8);
3936   match(Set dst (AddReductionVD dst src));
3937   match(Set dst (MulReductionVD dst src));
3938   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
3939   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
3940   ins_encode %{
3941     int opcode = this->ideal_Opcode();
3942     int vlen = vector_length(this, $src);
3943     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
3944   %}
3945   ins_pipe( pipe_slow );
3946 %}
3947 
3948 // ====================VECTOR ARITHMETIC=======================================
3949 
3950 // --------------------------------- ADD --------------------------------------
3951 
3952 // Bytes vector add
3953 instruct vaddB(vec dst, vec src) %{
3954   predicate(UseAVX == 0);
3955   match(Set dst (AddVB dst src));
3956   format %{ "paddb   $dst,$src\t! add packedB" %}
3957   ins_encode %{
3958     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
3959   %}
3960   ins_pipe( pipe_slow );
3961 %}
3962 
3963 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
3964   predicate(UseAVX > 0);
3965   match(Set dst (AddVB src1 src2));
3966   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
3967   ins_encode %{
3968     int vector_len = vector_length_encoding(this);
3969     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
3970   %}
3971   ins_pipe( pipe_slow );
3972 %}
3973 
3974 instruct vaddB_mem(vec dst, vec src, memory mem) %{
3975   predicate(UseAVX > 0);
3976   match(Set dst (AddVB src (LoadVector mem)));
3977   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
3978   ins_encode %{
3979     int vector_len = vector_length_encoding(this);
3980     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
3981   %}
3982   ins_pipe( pipe_slow );
3983 %}
3984 
3985 // Shorts/Chars vector add
3986 instruct vaddS(vec dst, vec src) %{
3987   predicate(UseAVX == 0);
3988   match(Set dst (AddVS dst src));
3989   format %{ "paddw   $dst,$src\t! add packedS" %}
3990   ins_encode %{
3991     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
3992   %}
3993   ins_pipe( pipe_slow );
3994 %}
3995 
3996 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
3997   predicate(UseAVX > 0);
3998   match(Set dst (AddVS src1 src2));
3999   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
4000   ins_encode %{
4001     int vector_len = vector_length_encoding(this);
4002     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4003   %}
4004   ins_pipe( pipe_slow );
4005 %}
4006 
4007 instruct vaddS_mem(vec dst, vec src, memory mem) %{
4008   predicate(UseAVX > 0);
4009   match(Set dst (AddVS src (LoadVector mem)));
4010   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
4011   ins_encode %{
4012     int vector_len = vector_length_encoding(this);
4013     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4014   %}
4015   ins_pipe( pipe_slow );
4016 %}
4017 
4018 // Integers vector add
4019 instruct vaddI(vec dst, vec src) %{
4020   predicate(UseAVX == 0);
4021   match(Set dst (AddVI dst src));
4022   format %{ "paddd   $dst,$src\t! add packedI" %}
4023   ins_encode %{
4024     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
4025   %}
4026   ins_pipe( pipe_slow );
4027 %}
4028 
4029 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
4030   predicate(UseAVX > 0);
4031   match(Set dst (AddVI src1 src2));
4032   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
4033   ins_encode %{
4034     int vector_len = vector_length_encoding(this);
4035     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4036   %}
4037   ins_pipe( pipe_slow );
4038 %}
4039 
4040 
4041 instruct vaddI_mem(vec dst, vec src, memory mem) %{
4042   predicate(UseAVX > 0);
4043   match(Set dst (AddVI src (LoadVector mem)));
4044   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
4045   ins_encode %{
4046     int vector_len = vector_length_encoding(this);
4047     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4048   %}
4049   ins_pipe( pipe_slow );
4050 %}
4051 
4052 // Longs vector add
4053 instruct vaddL(vec dst, vec src) %{
4054   predicate(UseAVX == 0);
4055   match(Set dst (AddVL dst src));
4056   format %{ "paddq   $dst,$src\t! add packedL" %}
4057   ins_encode %{
4058     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
4059   %}
4060   ins_pipe( pipe_slow );
4061 %}
4062 
4063 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
4064   predicate(UseAVX > 0);
4065   match(Set dst (AddVL src1 src2));
4066   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
4067   ins_encode %{
4068     int vector_len = vector_length_encoding(this);
4069     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4070   %}
4071   ins_pipe( pipe_slow );
4072 %}
4073 
4074 instruct vaddL_mem(vec dst, vec src, memory mem) %{
4075   predicate(UseAVX > 0);
4076   match(Set dst (AddVL src (LoadVector mem)));
4077   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
4078   ins_encode %{
4079     int vector_len = vector_length_encoding(this);
4080     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4081   %}
4082   ins_pipe( pipe_slow );
4083 %}
4084 
4085 // Floats vector add
4086 instruct vaddF(vec dst, vec src) %{
4087   predicate(UseAVX == 0);
4088   match(Set dst (AddVF dst src));
4089   format %{ "addps   $dst,$src\t! add packedF" %}
4090   ins_encode %{
4091     __ addps($dst$$XMMRegister, $src$$XMMRegister);
4092   %}
4093   ins_pipe( pipe_slow );
4094 %}
4095 
4096 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
4097   predicate(UseAVX > 0);
4098   match(Set dst (AddVF src1 src2));
4099   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
4100   ins_encode %{
4101     int vector_len = vector_length_encoding(this);
4102     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4103   %}
4104   ins_pipe( pipe_slow );
4105 %}
4106 
4107 instruct vaddF_mem(vec dst, vec src, memory mem) %{
4108   predicate(UseAVX > 0);
4109   match(Set dst (AddVF src (LoadVector mem)));
4110   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
4111   ins_encode %{
4112     int vector_len = vector_length_encoding(this);
4113     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4114   %}
4115   ins_pipe( pipe_slow );
4116 %}
4117 
4118 // Doubles vector add
4119 instruct vaddD(vec dst, vec src) %{
4120   predicate(UseAVX == 0);
4121   match(Set dst (AddVD dst src));
4122   format %{ "addpd   $dst,$src\t! add packedD" %}
4123   ins_encode %{
4124     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
4125   %}
4126   ins_pipe( pipe_slow );
4127 %}
4128 
4129 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
4130   predicate(UseAVX > 0);
4131   match(Set dst (AddVD src1 src2));
4132   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
4133   ins_encode %{
4134     int vector_len = vector_length_encoding(this);
4135     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4136   %}
4137   ins_pipe( pipe_slow );
4138 %}
4139 
4140 instruct vaddD_mem(vec dst, vec src, memory mem) %{
4141   predicate(UseAVX > 0);
4142   match(Set dst (AddVD src (LoadVector mem)));
4143   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
4144   ins_encode %{
4145     int vector_len = vector_length_encoding(this);
4146     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4147   %}
4148   ins_pipe( pipe_slow );
4149 %}
4150 
4151 // --------------------------------- SUB --------------------------------------
4152 
4153 // Bytes vector sub
4154 instruct vsubB(vec dst, vec src) %{
4155   predicate(UseAVX == 0);
4156   match(Set dst (SubVB dst src));
4157   format %{ "psubb   $dst,$src\t! sub packedB" %}
4158   ins_encode %{
4159     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
4160   %}
4161   ins_pipe( pipe_slow );
4162 %}
4163 
4164 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
4165   predicate(UseAVX > 0);
4166   match(Set dst (SubVB src1 src2));
4167   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
4168   ins_encode %{
4169     int vector_len = vector_length_encoding(this);
4170     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4171   %}
4172   ins_pipe( pipe_slow );
4173 %}
4174 
4175 instruct vsubB_mem(vec dst, vec src, memory mem) %{
4176   predicate(UseAVX > 0);
4177   match(Set dst (SubVB src (LoadVector mem)));
4178   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
4179   ins_encode %{
4180     int vector_len = vector_length_encoding(this);
4181     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4182   %}
4183   ins_pipe( pipe_slow );
4184 %}
4185 
4186 // Shorts/Chars vector sub
4187 instruct vsubS(vec dst, vec src) %{
4188   predicate(UseAVX == 0);
4189   match(Set dst (SubVS dst src));
4190   format %{ "psubw   $dst,$src\t! sub packedS" %}
4191   ins_encode %{
4192     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
4193   %}
4194   ins_pipe( pipe_slow );
4195 %}
4196 
4197 
4198 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
4199   predicate(UseAVX > 0);
4200   match(Set dst (SubVS src1 src2));
4201   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
4202   ins_encode %{
4203     int vector_len = vector_length_encoding(this);
4204     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4205   %}
4206   ins_pipe( pipe_slow );
4207 %}
4208 
4209 instruct vsubS_mem(vec dst, vec src, memory mem) %{
4210   predicate(UseAVX > 0);
4211   match(Set dst (SubVS src (LoadVector mem)));
4212   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
4213   ins_encode %{
4214     int vector_len = vector_length_encoding(this);
4215     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4216   %}
4217   ins_pipe( pipe_slow );
4218 %}
4219 
4220 // Integers vector sub
4221 instruct vsubI(vec dst, vec src) %{
4222   predicate(UseAVX == 0);
4223   match(Set dst (SubVI dst src));
4224   format %{ "psubd   $dst,$src\t! sub packedI" %}
4225   ins_encode %{
4226     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
4227   %}
4228   ins_pipe( pipe_slow );
4229 %}
4230 
4231 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
4232   predicate(UseAVX > 0);
4233   match(Set dst (SubVI src1 src2));
4234   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
4235   ins_encode %{
4236     int vector_len = vector_length_encoding(this);
4237     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4238   %}
4239   ins_pipe( pipe_slow );
4240 %}
4241 
4242 instruct vsubI_mem(vec dst, vec src, memory mem) %{
4243   predicate(UseAVX > 0);
4244   match(Set dst (SubVI src (LoadVector mem)));
4245   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
4246   ins_encode %{
4247     int vector_len = vector_length_encoding(this);
4248     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4249   %}
4250   ins_pipe( pipe_slow );
4251 %}
4252 
4253 // Longs vector sub
4254 instruct vsubL(vec dst, vec src) %{
4255   predicate(UseAVX == 0);
4256   match(Set dst (SubVL dst src));
4257   format %{ "psubq   $dst,$src\t! sub packedL" %}
4258   ins_encode %{
4259     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
4260   %}
4261   ins_pipe( pipe_slow );
4262 %}
4263 
4264 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
4265   predicate(UseAVX > 0);
4266   match(Set dst (SubVL src1 src2));
4267   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
4268   ins_encode %{
4269     int vector_len = vector_length_encoding(this);
4270     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4271   %}
4272   ins_pipe( pipe_slow );
4273 %}
4274 
4275 
4276 instruct vsubL_mem(vec dst, vec src, memory mem) %{
4277   predicate(UseAVX > 0);
4278   match(Set dst (SubVL src (LoadVector mem)));
4279   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
4280   ins_encode %{
4281     int vector_len = vector_length_encoding(this);
4282     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4283   %}
4284   ins_pipe( pipe_slow );
4285 %}
4286 
4287 // Floats vector sub
4288 instruct vsubF(vec dst, vec src) %{
4289   predicate(UseAVX == 0);
4290   match(Set dst (SubVF dst src));
4291   format %{ "subps   $dst,$src\t! sub packedF" %}
4292   ins_encode %{
4293     __ subps($dst$$XMMRegister, $src$$XMMRegister);
4294   %}
4295   ins_pipe( pipe_slow );
4296 %}
4297 
4298 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
4299   predicate(UseAVX > 0);
4300   match(Set dst (SubVF src1 src2));
4301   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
4302   ins_encode %{
4303     int vector_len = vector_length_encoding(this);
4304     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4305   %}
4306   ins_pipe( pipe_slow );
4307 %}
4308 
4309 instruct vsubF_mem(vec dst, vec src, memory mem) %{
4310   predicate(UseAVX > 0);
4311   match(Set dst (SubVF src (LoadVector mem)));
4312   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
4313   ins_encode %{
4314     int vector_len = vector_length_encoding(this);
4315     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4316   %}
4317   ins_pipe( pipe_slow );
4318 %}
4319 
4320 // Doubles vector sub
4321 instruct vsubD(vec dst, vec src) %{
4322   predicate(UseAVX == 0);
4323   match(Set dst (SubVD dst src));
4324   format %{ "subpd   $dst,$src\t! sub packedD" %}
4325   ins_encode %{
4326     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
4327   %}
4328   ins_pipe( pipe_slow );
4329 %}
4330 
4331 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
4332   predicate(UseAVX > 0);
4333   match(Set dst (SubVD src1 src2));
4334   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
4335   ins_encode %{
4336     int vector_len = vector_length_encoding(this);
4337     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4338   %}
4339   ins_pipe( pipe_slow );
4340 %}
4341 
4342 instruct vsubD_mem(vec dst, vec src, memory mem) %{
4343   predicate(UseAVX > 0);
4344   match(Set dst (SubVD src (LoadVector mem)));
4345   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
4346   ins_encode %{
4347     int vector_len = vector_length_encoding(this);
4348     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4349   %}
4350   ins_pipe( pipe_slow );
4351 %}
4352 
4353 // --------------------------------- MUL --------------------------------------
4354 
4355 // Byte vector mul
4356 instruct mulB_reg(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{
4357   predicate(n->as_Vector()->length() == 4 ||
4358             n->as_Vector()->length() == 8);
4359   match(Set dst (MulVB src1 src2));
4360   effect(TEMP dst, TEMP tmp, TEMP scratch);
4361   format %{"vector_mulB $dst,$src1,$src2" %}
4362   ins_encode %{
4363     assert(UseSSE > 3, "required");
4364     __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister);
4365     __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister);
4366     __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister);
4367     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
4368     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
4369     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
4370   %}
4371   ins_pipe( pipe_slow );
4372 %}
4373 
4374 instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{
4375   predicate(n->as_Vector()->length() == 16 && UseAVX <= 1);
4376   match(Set dst (MulVB src1 src2));
4377   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
4378   format %{"vector_mulB $dst,$src1,$src2" %}
4379   ins_encode %{
4380     assert(UseSSE > 3, "required");
4381     __ pmovsxbw($tmp1$$XMMRegister, $src1$$XMMRegister);
4382     __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister);
4383     __ pmullw($tmp1$$XMMRegister, $tmp2$$XMMRegister);
4384     __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 0xEE);
4385     __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xEE);
4386     __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister);
4387     __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister);
4388     __ pmullw($tmp2$$XMMRegister, $dst$$XMMRegister);
4389     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
4390     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
4391     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
4392     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
4393   %}
4394   ins_pipe( pipe_slow );
4395 %}
4396 
4397 instruct vmul16B_reg_avx(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{
4398   predicate(n->as_Vector()->length() == 16 && UseAVX > 1);
4399   match(Set dst (MulVB src1 src2));
4400   effect(TEMP dst, TEMP tmp, TEMP scratch);
4401   format %{"vector_mulB $dst,$src1,$src2" %}
4402   ins_encode %{
4403   int vector_len = Assembler::AVX_256bit;
4404     __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len);
4405     __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len);
4406     __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vector_len);
4407     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
4408     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
4409     __ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister);
4410     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0);
4411   %}
4412   ins_pipe( pipe_slow );
4413 %}
4414 
4415 instruct vmul32B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{
4416   predicate(n->as_Vector()->length() == 32);
4417   match(Set dst (MulVB src1 src2));
4418   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
4419   format %{"vector_mulB $dst,$src1,$src2" %}
4420   ins_encode %{
4421     assert(UseAVX > 1, "required");
4422     int vector_len = Assembler::AVX_256bit;
4423     __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister);
4424     __ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister);
4425     __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len);
4426     __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4427     __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
4428     __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len);
4429     __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len);
4430     __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
4431     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
4432     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4433     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
4434     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4435     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vector_len);
4436     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len);
4437   %}
4438   ins_pipe( pipe_slow );
4439 %}
4440 
4441 instruct vmul64B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{
4442   predicate(n->as_Vector()->length() == 64);
4443   match(Set dst (MulVB src1 src2));
4444   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
4445   format %{"vector_mulB $dst,$src1,$src2\n\t" %}
4446   ins_encode %{
4447     assert(UseAVX > 2, "required");
4448     int vector_len = Assembler::AVX_512bit;
4449     __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister);
4450     __ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister);
4451     __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len);
4452     __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4453     __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
4454     __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len);
4455     __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len);
4456     __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
4457     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
4458     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4459     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
4460     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
4461     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4462     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register);
4463     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
4464   %}
4465   ins_pipe( pipe_slow );
4466 %}
4467 
4468 // Shorts/Chars vector mul
4469 instruct vmulS(vec dst, vec src) %{
4470   predicate(UseAVX == 0);
4471   match(Set dst (MulVS dst src));
4472   format %{ "pmullw $dst,$src\t! mul packedS" %}
4473   ins_encode %{
4474     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
4475   %}
4476   ins_pipe( pipe_slow );
4477 %}
4478 
4479 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
4480   predicate(UseAVX > 0);
4481   match(Set dst (MulVS src1 src2));
4482   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
4483   ins_encode %{
4484     int vector_len = vector_length_encoding(this);
4485     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4486   %}
4487   ins_pipe( pipe_slow );
4488 %}
4489 
4490 instruct vmulS_mem(vec dst, vec src, memory mem) %{
4491   predicate(UseAVX > 0);
4492   match(Set dst (MulVS src (LoadVector mem)));
4493   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
4494   ins_encode %{
4495     int vector_len = vector_length_encoding(this);
4496     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4497   %}
4498   ins_pipe( pipe_slow );
4499 %}
4500 
4501 // Integers vector mul
4502 instruct vmulI(vec dst, vec src) %{
4503   predicate(UseAVX == 0);
4504   match(Set dst (MulVI dst src));
4505   format %{ "pmulld  $dst,$src\t! mul packedI" %}
4506   ins_encode %{
4507     assert(UseSSE > 3, "required");
4508     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
4509   %}
4510   ins_pipe( pipe_slow );
4511 %}
4512 
4513 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
4514   predicate(UseAVX > 0);
4515   match(Set dst (MulVI src1 src2));
4516   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
4517   ins_encode %{
4518     int vector_len = vector_length_encoding(this);
4519     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4520   %}
4521   ins_pipe( pipe_slow );
4522 %}
4523 
4524 instruct vmulI_mem(vec dst, vec src, memory mem) %{
4525   predicate(UseAVX > 0);
4526   match(Set dst (MulVI src (LoadVector mem)));
4527   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
4528   ins_encode %{
4529     int vector_len = vector_length_encoding(this);
4530     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4531   %}
4532   ins_pipe( pipe_slow );
4533 %}
4534 
4535 // Longs vector mul
4536 instruct vmulL_reg(vec dst, vec src1, vec src2) %{
4537   match(Set dst (MulVL src1 src2));
4538   format %{ "vpmullq $dst,$src1,$src2\t! mul packedL" %}
4539   ins_encode %{
4540     assert(UseAVX > 2, "required");
4541     int vector_len = vector_length_encoding(this);
4542     __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4543   %}
4544   ins_pipe( pipe_slow );
4545 %}
4546 
4547 instruct vmulL_mem(vec dst, vec src, memory mem) %{
4548   match(Set dst (MulVL src (LoadVector mem)));
4549   format %{ "vpmullq $dst,$src,$mem\t! mul packedL" %}
4550   ins_encode %{
4551     assert(UseAVX > 2, "required");
4552     int vector_len = vector_length_encoding(this);
4553     __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4554   %}
4555   ins_pipe( pipe_slow );
4556 %}
4557 
4558 // Floats vector mul
4559 instruct vmulF(vec dst, vec src) %{
4560   predicate(UseAVX == 0);
4561   match(Set dst (MulVF dst src));
4562   format %{ "mulps   $dst,$src\t! mul packedF" %}
4563   ins_encode %{
4564     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
4565   %}
4566   ins_pipe( pipe_slow );
4567 %}
4568 
4569 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
4570   predicate(UseAVX > 0);
4571   match(Set dst (MulVF src1 src2));
4572   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
4573   ins_encode %{
4574     int vector_len = vector_length_encoding(this);
4575     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4576   %}
4577   ins_pipe( pipe_slow );
4578 %}
4579 
4580 instruct vmulF_mem(vec dst, vec src, memory mem) %{
4581   predicate(UseAVX > 0);
4582   match(Set dst (MulVF src (LoadVector mem)));
4583   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
4584   ins_encode %{
4585     int vector_len = vector_length_encoding(this);
4586     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4587   %}
4588   ins_pipe( pipe_slow );
4589 %}
4590 
4591 // Doubles vector mul
4592 instruct vmulD(vec dst, vec src) %{
4593   predicate(UseAVX == 0);
4594   match(Set dst (MulVD dst src));
4595   format %{ "mulpd   $dst,$src\t! mul packedD" %}
4596   ins_encode %{
4597     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
4598   %}
4599   ins_pipe( pipe_slow );
4600 %}
4601 
4602 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
4603   predicate(UseAVX > 0);
4604   match(Set dst (MulVD src1 src2));
4605   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
4606   ins_encode %{
4607     int vector_len = vector_length_encoding(this);
4608     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4609   %}
4610   ins_pipe( pipe_slow );
4611 %}
4612 
4613 instruct vmulD_mem(vec dst, vec src, memory mem) %{
4614   predicate(UseAVX > 0);
4615   match(Set dst (MulVD src (LoadVector mem)));
4616   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
4617   ins_encode %{
4618     int vector_len = vector_length_encoding(this);
4619     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4620   %}
4621   ins_pipe( pipe_slow );
4622 %}
4623 
4624 instruct vcmov8F_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{
4625   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4626   match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2)));
4627   effect(TEMP dst, USE src1, USE src2);
4628   format %{ "cmpps.$copnd  $dst, $src1, $src2  ! vcmovevf, cond=$cop\n\t"
4629             "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t"
4630          %}
4631   ins_encode %{
4632     int vector_len = 1;
4633     int cond = (Assembler::Condition)($copnd$$cmpcode);
4634     __ cmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len);
4635     __ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len);
4636   %}
4637   ins_pipe( pipe_slow );
4638 %}
4639 
4640 instruct vcmov4D_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{
4641   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4642   match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2)));
4643   effect(TEMP dst, USE src1, USE src2);
4644   format %{ "cmppd.$copnd  $dst, $src1, $src2  ! vcmovevd, cond=$cop\n\t"
4645             "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t"
4646          %}
4647   ins_encode %{
4648     int vector_len = 1;
4649     int cond = (Assembler::Condition)($copnd$$cmpcode);
4650     __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len);
4651     __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len);
4652   %}
4653   ins_pipe( pipe_slow );
4654 %}
4655 
4656 // --------------------------------- DIV --------------------------------------
4657 
4658 // Floats vector div
4659 instruct vdivF(vec dst, vec src) %{
4660   predicate(UseAVX == 0);
4661   match(Set dst (DivVF dst src));
4662   format %{ "divps   $dst,$src\t! div packedF" %}
4663   ins_encode %{
4664     __ divps($dst$$XMMRegister, $src$$XMMRegister);
4665   %}
4666   ins_pipe( pipe_slow );
4667 %}
4668 
4669 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
4670   predicate(UseAVX > 0);
4671   match(Set dst (DivVF src1 src2));
4672   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
4673   ins_encode %{
4674     int vector_len = vector_length_encoding(this);
4675     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4676   %}
4677   ins_pipe( pipe_slow );
4678 %}
4679 
4680 instruct vdivF_mem(vec dst, vec src, memory mem) %{
4681   predicate(UseAVX > 0);
4682   match(Set dst (DivVF src (LoadVector mem)));
4683   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
4684   ins_encode %{
4685     int vector_len = vector_length_encoding(this);
4686     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4687   %}
4688   ins_pipe( pipe_slow );
4689 %}
4690 
4691 // Doubles vector div
4692 instruct vdivD(vec dst, vec src) %{
4693   predicate(UseAVX == 0);
4694   match(Set dst (DivVD dst src));
4695   format %{ "divpd   $dst,$src\t! div packedD" %}
4696   ins_encode %{
4697     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
4698   %}
4699   ins_pipe( pipe_slow );
4700 %}
4701 
4702 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
4703   predicate(UseAVX > 0);
4704   match(Set dst (DivVD src1 src2));
4705   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
4706   ins_encode %{
4707     int vector_len = vector_length_encoding(this);
4708     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4709   %}
4710   ins_pipe( pipe_slow );
4711 %}
4712 
4713 instruct vdivD_mem(vec dst, vec src, memory mem) %{
4714   predicate(UseAVX > 0);
4715   match(Set dst (DivVD src (LoadVector mem)));
4716   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
4717   ins_encode %{
4718     int vector_len = vector_length_encoding(this);
4719     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
4720   %}
4721   ins_pipe( pipe_slow );
4722 %}
4723 
4724 // --------------------------------- Sqrt --------------------------------------
4725 
4726 instruct vsqrtF_reg(vec dst, vec src) %{
4727   match(Set dst (SqrtVF src));
4728   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
4729   ins_encode %{
4730     assert(UseAVX > 0, "required");
4731     int vector_len = vector_length_encoding(this);
4732     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len);
4733   %}
4734   ins_pipe( pipe_slow );
4735 %}
4736 
4737 instruct vsqrtF_mem(vec dst, memory mem) %{
4738   match(Set dst (SqrtVF (LoadVector mem)));
4739   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
4740   ins_encode %{
4741     assert(UseAVX > 0, "required");
4742     int vector_len = vector_length_encoding(this);
4743     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len);
4744   %}
4745   ins_pipe( pipe_slow );
4746 %}
4747 
4748 // Floating point vector sqrt
4749 instruct vsqrtD_reg(vec dst, vec src) %{
4750   match(Set dst (SqrtVD src));
4751   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
4752   ins_encode %{
4753     assert(UseAVX > 0, "required");
4754     int vector_len = vector_length_encoding(this);
4755     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
4756   %}
4757   ins_pipe( pipe_slow );
4758 %}
4759 
4760 instruct vsqrtD_mem(vec dst, memory mem) %{
4761   match(Set dst (SqrtVD (LoadVector mem)));
4762   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
4763   ins_encode %{
4764     assert(UseAVX > 0, "required");
4765     int vector_len = vector_length_encoding(this);
4766     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len);
4767   %}
4768   ins_pipe( pipe_slow );
4769 %}
4770 
4771 // ------------------------------ Shift ---------------------------------------
4772 
4773 // Left and right shift count vectors are the same on x86
4774 // (only lowest bits of xmm reg are used for count).
4775 instruct vshiftcnt(vec dst, rRegI cnt) %{
4776   match(Set dst (LShiftCntV cnt));
4777   match(Set dst (RShiftCntV cnt));
4778   format %{ "movdl    $dst,$cnt\t! load shift count" %}
4779   ins_encode %{
4780     __ movdl($dst$$XMMRegister, $cnt$$Register);
4781   %}
4782   ins_pipe( pipe_slow );
4783 %}
4784 
4785 // Byte vector shift
4786 instruct vshiftB(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
4787   predicate(n->as_Vector()->length() <= 8);
4788   match(Set dst (LShiftVB src shift));
4789   match(Set dst (RShiftVB src shift));
4790   match(Set dst (URShiftVB src shift));
4791   effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch);
4792   format %{"vector_byte_shift $dst,$src,$shift" %}
4793   ins_encode %{
4794     assert(UseSSE > 3, "required");
4795     int opcode = this->ideal_Opcode();
4796     __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister);
4797     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
4798     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
4799     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
4800     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
4801   %}
4802   ins_pipe( pipe_slow );
4803 %}
4804 
4805 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{
4806   predicate(n->as_Vector()->length() == 16 && UseAVX <= 1);
4807   match(Set dst (LShiftVB src shift));
4808   match(Set dst (RShiftVB src shift));
4809   match(Set dst (URShiftVB src shift));
4810   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch);
4811   format %{"vector_byte_shift $dst,$src,$shift" %}
4812   ins_encode %{
4813     assert(UseSSE > 3, "required");
4814     int opcode = this->ideal_Opcode();
4815 
4816     __ vextendbw(opcode, $tmp1$$XMMRegister, $src$$XMMRegister);
4817     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
4818     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
4819     __ vextendbw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
4820     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
4821     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
4822     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
4823     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
4824     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
4825   %}
4826   ins_pipe( pipe_slow );
4827 %}
4828 
4829 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
4830   predicate(n->as_Vector()->length() == 16 && UseAVX > 1);
4831   match(Set dst (LShiftVB src shift));
4832   match(Set dst (RShiftVB src shift));
4833   match(Set dst (URShiftVB src shift));
4834   effect(TEMP dst, TEMP tmp, TEMP scratch);
4835   format %{"vector_byte_shift $dst,$src,$shift" %}
4836   ins_encode %{
4837     int opcode = this->ideal_Opcode();
4838     int vector_len = Assembler::AVX_256bit;
4839     __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister, vector_len);
4840     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
4841     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
4842     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
4843     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
4844   %}
4845   ins_pipe( pipe_slow );
4846 %}
4847 
4848 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
4849   predicate(n->as_Vector()->length() == 32);
4850   match(Set dst (LShiftVB src shift));
4851   match(Set dst (RShiftVB src shift));
4852   match(Set dst (URShiftVB src shift));
4853   effect(TEMP dst, TEMP tmp, TEMP scratch);
4854   format %{"vector_byte_shift $dst,$src,$shift" %}
4855   ins_encode %{
4856     assert(UseAVX > 1, "required");
4857     int opcode = this->ideal_Opcode();
4858     int vector_len = Assembler::AVX_256bit;
4859     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
4860     __ vextendbw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len);
4861     __ vextendbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len);
4862     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
4863     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vector_len);
4864     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
4865     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
4866     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
4867     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len);
4868   %}
4869   ins_pipe( pipe_slow );
4870 %}
4871 
4872 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{
4873   predicate(n->as_Vector()->length() == 64);
4874   match(Set dst (LShiftVB src shift));
4875   match(Set dst (RShiftVB src shift));
4876   match(Set dst (URShiftVB src shift));
4877   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
4878   format %{"vector_byte_shift $dst,$src,$shift" %}
4879   ins_encode %{
4880     assert(UseAVX > 2, "required");
4881     int opcode = this->ideal_Opcode();
4882     int vector_len = Assembler::AVX_512bit;
4883     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
4884     __ vextendbw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len);
4885     __ vextendbw(opcode, $tmp2$$XMMRegister, $src$$XMMRegister, vector_len);
4886     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vector_len);
4887     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vector_len);
4888     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
4889     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
4890     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
4891     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
4892     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4893     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register);
4894     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
4895   %}
4896   ins_pipe( pipe_slow );
4897 %}
4898 
4899 // Shorts vector logical right shift produces incorrect Java result
4900 // for negative data because java code convert short value into int with
4901 // sign extension before a shift. But char vectors are fine since chars are
4902 // unsigned values.
4903 // Shorts/Chars vector left shift
4904 instruct vshiftS(vec dst, vec src, vec shift) %{
4905   match(Set dst (LShiftVS src shift));
4906   match(Set dst (RShiftVS src shift));
4907   match(Set dst (URShiftVS src shift));
4908   effect(TEMP dst, USE src, USE shift);
4909   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
4910   ins_encode %{
4911     int opcode = this->ideal_Opcode();
4912     if (UseAVX > 0) {
4913       int vlen_enc = vector_length_encoding(this);
4914       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
4915     } else {
4916       int vlen = vector_length(this);
4917       if (vlen == 2) {
4918         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
4919         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
4920       } else if (vlen == 4) {
4921         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
4922         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
4923       } else {
4924         assert (vlen == 8, "sanity");
4925         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
4926         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
4927       }
4928     }
4929   %}
4930   ins_pipe( pipe_slow );
4931 %}
4932 
4933 // Integers vector left shift
4934 instruct vshiftI(vec dst, vec src, vec shift) %{
4935   match(Set dst (LShiftVI src shift));
4936   match(Set dst (RShiftVI src shift));
4937   match(Set dst (URShiftVI src shift));
4938   effect(TEMP dst, USE src, USE shift);
4939   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
4940   ins_encode %{
4941     int opcode = this->ideal_Opcode();
4942     if (UseAVX > 0) {
4943       int vector_len = vector_length_encoding(this);
4944       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
4945     } else {
4946       int vlen = vector_length(this);
4947       if (vlen == 2) {
4948         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
4949         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
4950       } else {
4951         assert(vlen == 4, "sanity");
4952         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
4953         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
4954       }
4955     }
4956   %}
4957   ins_pipe( pipe_slow );
4958 %}
4959 
4960 // Integers vector left constant shift
4961 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
4962   match(Set dst (LShiftVI src (LShiftCntV shift)));
4963   match(Set dst (RShiftVI src (RShiftCntV shift)));
4964   match(Set dst (URShiftVI src (RShiftCntV shift)));
4965   effect(TEMP dst, USE src);
4966   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
4967   ins_encode %{
4968     int opcode = this->ideal_Opcode();
4969     if (UseAVX > 0) {
4970       int vector_len = vector_length_encoding(this);
4971       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
4972     } else {
4973       int vlen = vector_length(this);
4974       if (vlen == 2) {
4975         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
4976         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
4977       } else {
4978         assert(vlen == 4, "sanity");
4979         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
4980         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
4981       }
4982     }
4983   %}
4984   ins_pipe( pipe_slow );
4985 %}
4986 
4987 // Longs vector shift
4988 instruct vshiftL(vec dst, vec src, vec shift) %{
4989   match(Set dst (LShiftVL src shift));
4990   match(Set dst (URShiftVL src shift));
4991   effect(TEMP dst, USE src, USE shift);
4992   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
4993   ins_encode %{
4994     int opcode = this->ideal_Opcode();
4995     if (UseAVX > 0) {
4996       int vector_len = vector_length_encoding(this);
4997       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
4998     } else {
4999       assert(vector_length(this) == 2, "");
5000       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
5001       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
5002     }
5003   %}
5004   ins_pipe( pipe_slow );
5005 %}
5006 
5007 // Longs vector constant shift
5008 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
5009   match(Set dst (LShiftVL src (LShiftCntV shift)));
5010   match(Set dst (URShiftVL src (RShiftCntV shift)));
5011   effect(TEMP dst, USE src, USE shift);
5012   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
5013   ins_encode %{
5014     int opcode = this->ideal_Opcode();
5015     if (UseAVX > 0) {
5016       int vector_len = vector_length_encoding(this);
5017       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
5018     } else {
5019       assert(vector_length(this) == 2, "");
5020       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
5021       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
5022     }
5023   %}
5024   ins_pipe( pipe_slow );
5025 %}
5026 
5027 // -------------------ArithmeticRightShift -----------------------------------
5028 // Long vector arithmetic right shift
5029 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
5030   predicate(UseAVX <= 2);
5031   match(Set dst (RShiftVL src shift));
5032   effect(TEMP dst, TEMP tmp, TEMP scratch);
5033   format %{ "vshiftq $dst,$src,$shift" %}
5034   ins_encode %{
5035     uint vlen = vector_length(this);
5036     if (vlen == 2) {
5037       assert(UseSSE >= 2, "required");
5038       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
5039       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
5040       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register);
5041       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
5042       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
5043       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
5044     } else {
5045       assert(vlen == 4, "sanity");
5046       assert(UseAVX > 1, "required");
5047       int vector_len = Assembler::AVX_256bit;
5048       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
5049       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register);
5050       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
5051       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
5052       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
5053     }
5054   %}
5055   ins_pipe( pipe_slow );
5056 %}
5057 
5058 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
5059   predicate(UseAVX > 2);
5060   match(Set dst (RShiftVL src shift));
5061   format %{ "vshiftq $dst,$src,$shift" %}
5062   ins_encode %{
5063     int vector_len = vector_length_encoding(this);
5064     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
5065   %}
5066   ins_pipe( pipe_slow );
5067 %}
5068 
5069 // --------------------------------- AND --------------------------------------
5070 
5071 instruct vand(vec dst, vec src) %{
5072   predicate(UseAVX == 0);
5073   match(Set dst (AndV dst src));
5074   format %{ "pand    $dst,$src\t! and vectors" %}
5075   ins_encode %{
5076     __ pand($dst$$XMMRegister, $src$$XMMRegister);
5077   %}
5078   ins_pipe( pipe_slow );
5079 %}
5080 
5081 instruct vand_reg(vec dst, vec src1, vec src2) %{
5082   predicate(UseAVX > 0);
5083   match(Set dst (AndV src1 src2));
5084   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
5085   ins_encode %{
5086     int vector_len = vector_length_encoding(this);
5087     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5088   %}
5089   ins_pipe( pipe_slow );
5090 %}
5091 
5092 instruct vand_mem(vec dst, vec src, memory mem) %{
5093   predicate(UseAVX > 0);
5094   match(Set dst (AndV src (LoadVector mem)));
5095   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
5096   ins_encode %{
5097     int vector_len = vector_length_encoding(this);
5098     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5099   %}
5100   ins_pipe( pipe_slow );
5101 %}
5102 
5103 // --------------------------------- OR ---------------------------------------
5104 
5105 instruct vor(vec dst, vec src) %{
5106   predicate(UseAVX == 0);
5107   match(Set dst (OrV dst src));
5108   format %{ "por     $dst,$src\t! or vectors" %}
5109   ins_encode %{
5110     __ por($dst$$XMMRegister, $src$$XMMRegister);
5111   %}
5112   ins_pipe( pipe_slow );
5113 %}
5114 
5115 instruct vor_reg(vec dst, vec src1, vec src2) %{
5116   predicate(UseAVX > 0);
5117   match(Set dst (OrV src1 src2));
5118   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
5119   ins_encode %{
5120     int vector_len = vector_length_encoding(this);
5121     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5122   %}
5123   ins_pipe( pipe_slow );
5124 %}
5125 
5126 instruct vor_mem(vec dst, vec src, memory mem) %{
5127   predicate(UseAVX > 0);
5128   match(Set dst (OrV src (LoadVector mem)));
5129   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
5130   ins_encode %{
5131     int vector_len = vector_length_encoding(this);
5132     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5133   %}
5134   ins_pipe( pipe_slow );
5135 %}
5136 
5137 // --------------------------------- XOR --------------------------------------
5138 
5139 instruct vxor(vec dst, vec src) %{
5140   predicate(UseAVX == 0);
5141   match(Set dst (XorV dst src));
5142   format %{ "pxor    $dst,$src\t! xor vectors" %}
5143   ins_encode %{
5144     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
5145   %}
5146   ins_pipe( pipe_slow );
5147 %}
5148 
5149 instruct vxor_reg(vec dst, vec src1, vec src2) %{
5150   predicate(UseAVX > 0);
5151   match(Set dst (XorV src1 src2));
5152   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
5153   ins_encode %{
5154     int vector_len = vector_length_encoding(this);
5155     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5156   %}
5157   ins_pipe( pipe_slow );
5158 %}
5159 
5160 instruct vxor_mem(vec dst, vec src, memory mem) %{
5161   predicate(UseAVX > 0);
5162   match(Set dst (XorV src (LoadVector mem)));
5163   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
5164   ins_encode %{
5165     int vector_len = vector_length_encoding(this);
5166     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5167   %}
5168   ins_pipe( pipe_slow );
5169 %}
5170 
5171 // --------------------------------- ABS --------------------------------------
5172 // a = |a|
5173 instruct vabsB_reg(vec dst, vec src) %{
5174   match(Set dst (AbsVB  src));
5175   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
5176   ins_encode %{
5177     uint vlen = vector_length(this);
5178     if (vlen <= 16) {
5179       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
5180     } else {
5181       int vlen_enc = vector_length_encoding(this);
5182       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
5183     }
5184   %}
5185   ins_pipe( pipe_slow );
5186 %}
5187 
5188 instruct vabsS_reg(vec dst, vec src) %{
5189   match(Set dst (AbsVS  src));
5190   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
5191   ins_encode %{
5192     uint vlen = vector_length(this);
5193     if (vlen <= 8) {
5194       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
5195     } else {
5196       int vlen_enc = vector_length_encoding(this);
5197       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
5198     }
5199   %}
5200   ins_pipe( pipe_slow );
5201 %}
5202 
5203 instruct vabsI_reg(vec dst, vec src) %{
5204   match(Set dst (AbsVI  src));
5205   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
5206   ins_encode %{
5207     uint vlen = vector_length(this);
5208     if (vlen <= 4) {
5209       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
5210     } else {
5211       int vlen_enc = vector_length_encoding(this);
5212       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
5213     }
5214   %}
5215   ins_pipe( pipe_slow );
5216 %}
5217 
5218 instruct vabsL_reg(vec dst, vec src) %{
5219   match(Set dst (AbsVL  src));
5220   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
5221   ins_encode %{
5222     assert(UseAVX > 2, "required");
5223     int vector_len = vector_length_encoding(this);
5224     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
5225   %}
5226   ins_pipe( pipe_slow );
5227 %}
5228 
5229 // --------------------------------- ABSNEG --------------------------------------
5230 
5231 instruct vabsnegF(vec dst, vec src, rRegI scratch) %{
5232   predicate(n->as_Vector()->length() != 4); // handled by 1-operand instruction vabsneg4F
5233   match(Set dst (AbsVF src));
5234   match(Set dst (NegVF src));
5235   effect(TEMP scratch);
5236   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
5237   ins_cost(150);
5238   ins_encode %{
5239     int opcode = this->ideal_Opcode();
5240     int vlen = vector_length(this);
5241     if (vlen == 2) {
5242       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register);
5243     } else {
5244       assert(vlen == 8 || vlen == 16, "required");
5245       int vlen_enc = vector_length_encoding(this);
5246       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register);
5247     }
5248   %}
5249   ins_pipe( pipe_slow );
5250 %}
5251 
5252 instruct vabsneg4F(vec dst, rRegI scratch) %{
5253   predicate(n->as_Vector()->length() == 4);
5254   match(Set dst (AbsVF dst));
5255   match(Set dst (NegVF dst));
5256   effect(TEMP scratch);
5257   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
5258   ins_cost(150);
5259   ins_encode %{
5260     int opcode = this->ideal_Opcode();
5261     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $scratch$$Register);
5262   %}
5263   ins_pipe( pipe_slow );
5264 %}
5265 
5266 instruct vabsnegD(vec dst, vec src, rRegI scratch) %{
5267   match(Set dst (AbsVD  src));
5268   match(Set dst (NegVD  src));
5269   effect(TEMP scratch);
5270   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
5271   ins_encode %{
5272     int opcode = this->ideal_Opcode();
5273     uint vlen = vector_length(this);
5274     if (vlen == 2) {
5275       assert(UseSSE >= 2, "required");
5276       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register);
5277     } else {
5278       int vlen_enc = vector_length_encoding(this);
5279       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register);
5280     }
5281   %}
5282   ins_pipe( pipe_slow );
5283 %}
5284 
5285 // --------------------------------- FMA --------------------------------------
5286 // a * b + c
5287 
5288 instruct vfmaF_reg(vec a, vec b, vec c) %{
5289   match(Set c (FmaVF  c (Binary a b)));
5290   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
5291   ins_cost(150);
5292   ins_encode %{
5293     assert(UseFMA, "not enabled");
5294     int vector_len = vector_length_encoding(this);
5295     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len);
5296   %}
5297   ins_pipe( pipe_slow );
5298 %}
5299 
5300 instruct vfmaF_mem(vec a, memory b, vec c) %{
5301   match(Set c (FmaVF  c (Binary a (LoadVector b))));
5302   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
5303   ins_cost(150);
5304   ins_encode %{
5305     assert(UseFMA, "not enabled");
5306     int vector_len = vector_length_encoding(this);
5307     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len);
5308   %}
5309   ins_pipe( pipe_slow );
5310 %}
5311 
5312 instruct vfmaD_reg(vec a, vec b, vec c) %{
5313   match(Set c (FmaVD  c (Binary a b)));
5314   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
5315   ins_cost(150);
5316   ins_encode %{
5317     assert(UseFMA, "not enabled");
5318     int vector_len = vector_length_encoding(this);
5319     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len);
5320   %}
5321   ins_pipe( pipe_slow );
5322 %}
5323 
5324 instruct vfmaD_mem(vec a, memory b, vec c) %{
5325   match(Set c (FmaVD  c (Binary a (LoadVector b))));
5326   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
5327   ins_cost(150);
5328   ins_encode %{
5329     assert(UseFMA, "not enabled");
5330     int vector_len = vector_length_encoding(this);
5331     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len);
5332   %}
5333   ins_pipe( pipe_slow );
5334 %}
5335 
5336 // --------------------------------- Vector Multiply Add --------------------------------------
5337 
5338 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
5339   predicate(UseAVX == 0);
5340   match(Set dst (MulAddVS2VI dst src1));
5341   format %{ "pmaddwd $dst,$dst,$src1\t! muladd packedStoI" %}
5342   ins_encode %{
5343     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
5344   %}
5345   ins_pipe( pipe_slow );
5346 %}
5347 
5348 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
5349   predicate(UseAVX > 0);
5350   match(Set dst (MulAddVS2VI src1 src2));
5351   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
5352   ins_encode %{
5353     int vector_len = vector_length_encoding(this);
5354     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5355   %}
5356   ins_pipe( pipe_slow );
5357 %}
5358 
5359 // --------------------------------- Vector Multiply Add Add ----------------------------------
5360 
5361 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
5362   predicate(VM_Version::supports_avx512_vnni());
5363   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
5364   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
5365   ins_encode %{
5366     assert(UseAVX > 2, "required");
5367     int vector_len = vector_length_encoding(this);
5368     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5369   %}
5370   ins_pipe( pipe_slow );
5371   ins_cost(10);
5372 %}
5373 
5374 // --------------------------------- PopCount --------------------------------------
5375 
5376 instruct vpopcountI(vec dst, vec src) %{
5377   match(Set dst (PopCountVI src));
5378   format %{ "vpopcntd  $dst,$src\t! vector popcount packedI" %}
5379   ins_encode %{
5380     assert(UsePopCountInstruction, "not enabled");
5381 
5382     int vector_len = vector_length_encoding(this);
5383     __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
5384   %}
5385   ins_pipe( pipe_slow );
5386 %}
5387 
5388 // --------------------------------- Bitwise Ternary Logic ----------------------------------
5389 
5390 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
5391   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
5392   effect(TEMP dst);
5393   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
5394   ins_encode %{
5395     int vector_len = vector_length_encoding(this);
5396     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
5397   %}
5398   ins_pipe( pipe_slow );
5399 %}
5400 
5401 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
5402   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
5403   effect(TEMP dst);
5404   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
5405   ins_encode %{
5406     int vector_len = vector_length_encoding(this);
5407     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
5408   %}
5409   ins_pipe( pipe_slow );
5410 %}
5411 
5412 // --------------------------------- Rotation Operations ----------------------------------
5413 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
5414   match(Set dst (RotateLeftV src shift));
5415   match(Set dst (RotateRightV src shift));
5416   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
5417   ins_encode %{
5418     int opcode      = this->ideal_Opcode();
5419     int vector_len  = vector_length_encoding(this);
5420     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
5421     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
5422   %}
5423   ins_pipe( pipe_slow );
5424 %}
5425 
5426 instruct vprorate(vec dst, vec src, vec shift) %{
5427   match(Set dst (RotateLeftV src shift));
5428   match(Set dst (RotateRightV src shift));
5429   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
5430   ins_encode %{
5431     int opcode      = this->ideal_Opcode();
5432     int vector_len  = vector_length_encoding(this);
5433     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
5434     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
5435   %}
5436   ins_pipe( pipe_slow );
5437 %}
5438