< prev index next >

src/cpu/x86/vm/x86.ad

Print this page




  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // XMM registers.  256-bit registers or 8 words each, labeled (a)-h.
  63 // Word a in each register holds a Float, words ab hold a Double.
  64 // The whole registers are used in SSE4.2 version intrinsics,
  65 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  66 // UseXMMForArrayCopy and UseSuperword flags).
  67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX).




  68 // Linux ABI:   No register preserved across function calls
  69 //              XMM0-XMM7 might hold parameters
  70 // Windows ABI: XMM6-XMM15 preserved across function calls
  71 //              XMM0-XMM3 might hold parameters
  72 
  73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));








  81 
  82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));








  90 
  91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));








  99 
 100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
 101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
 102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
 103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
 104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
 105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
 106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
 107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));








 108 
 109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
 110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
 111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
 112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
 113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
 114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
 115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
 116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));








 117 
 118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
 119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
 120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
 121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
 122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
 123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
 124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
 125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));








 126 
 127 #ifdef _WIN64
 128 
 129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
 130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
 131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
 132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
 133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
 134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
 135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
 136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));








 137 
 138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
 139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
 140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
 141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
 142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
 143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
 144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
 145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));








 146 
 147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
 148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
 149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
 150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
 151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
 152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
 153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
 154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));








 155 
 156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
 157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
 158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
 159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
 160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
 161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
 162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
 163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));








 164 
 165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
 167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
 168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
 169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
 170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
 171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
 172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));








 173 
 174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
 176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
 177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
 178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
 179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
 180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
 181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));








 182 
 183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
 185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
 186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
 187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
 188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
 189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
 190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));








 191 
 192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
 194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
 195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
 196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
 197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
 198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
 199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));








 200 
 201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
 203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
 204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
 205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
 206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
 207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
 208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));








 209 
 210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
 212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
 213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
 214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
 215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
 216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
 217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));























































































































































































































































































 218 
 219 #else // _WIN64
 220 
 221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
 222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
 223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
 224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
 225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
 226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
 227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
 228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));








 229 
 230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
 231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
 232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
 233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
 234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
 235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
 236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
 237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));








 238 
 239 #ifdef _LP64
 240 
 241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
 242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
 243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
 244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
 245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
 246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
 247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
 248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));








 249 
 250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
 251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
 252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
 253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
 254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
 255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
 256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
 257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));








 258 
 259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
 261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
 262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
 263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
 264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
 265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
 266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));








 267 
 268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
 270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
 271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
 272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
 273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
 274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
 275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));








 276 
 277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
 279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
 280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
 281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
 282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
 283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
 284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));








 285 
 286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
 288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
 289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
 290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
 291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
 292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
 293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));








 294 
 295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
 297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
 298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
 299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
 300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
 301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
 302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));








 303 
 304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
 306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
 307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
 308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
 309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
 310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
 311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
























































































































































































































































































 312 
 313 #endif // _LP64
 314 
 315 #endif // _WIN64
 316 
 317 #ifdef _LP64
 318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
 319 #else
 320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
 321 #endif // _LP64
 322 
 323 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 324                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 325                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 326                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 327                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 328                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 329                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 330                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
 331 #ifdef _LP64
 332                   ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 333                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 334                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 335                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 336                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 337                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 338                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 339                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
















 340 #endif
 341                    );
 342 
 343 // flags allocation class should be last.
 344 alloc_class chunk2(RFLAGS);
 345 
 346 // Singleton class for condition codes
 347 reg_class int_flags(RFLAGS);
 348 
 349 // Class for all float registers
 350 reg_class float_reg(XMM0,
 351                     XMM1,
 352                     XMM2,
 353                     XMM3,
 354                     XMM4,
 355                     XMM5,
 356                     XMM6,
 357                     XMM7
 358 #ifdef _LP64
 359                    ,XMM8,
 360                     XMM9,
 361                     XMM10,
 362                     XMM11,
 363                     XMM12,
 364                     XMM13,
 365                     XMM14,
 366                     XMM15
 367 #endif
 368                     );
 369 
 370 // Class for all double registers
 371 reg_class double_reg(XMM0,  XMM0b,







































 372                      XMM1,  XMM1b,
 373                      XMM2,  XMM2b,
 374                      XMM3,  XMM3b,
 375                      XMM4,  XMM4b,
 376                      XMM5,  XMM5b,
 377                      XMM6,  XMM6b,
 378                      XMM7,  XMM7b
 379 #ifdef _LP64
 380                     ,XMM8,  XMM8b,
 381                      XMM9,  XMM9b,
 382                      XMM10, XMM10b,
 383                      XMM11, XMM11b,
 384                      XMM12, XMM12b,
 385                      XMM13, XMM13b,
 386                      XMM14, XMM14b,
 387                      XMM15, XMM15b
 388 #endif
 389                      );
 390 
 391 // Class for all 32bit vector registers
 392 reg_class vectors_reg(XMM0,







































 393                       XMM1,
 394                       XMM2,
 395                       XMM3,
 396                       XMM4,
 397                       XMM5,
 398                       XMM6,
 399                       XMM7
 400 #ifdef _LP64
 401                      ,XMM8,
 402                       XMM9,
 403                       XMM10,
 404                       XMM11,
 405                       XMM12,
 406                       XMM13,
 407                       XMM14,
 408                       XMM15
 409 #endif
 410                       );
 411 







































 412 // Class for all 64bit vector registers
 413 reg_class vectord_reg(XMM0,  XMM0b,
 414                       XMM1,  XMM1b,
 415                       XMM2,  XMM2b,
 416                       XMM3,  XMM3b,
 417                       XMM4,  XMM4b,
 418                       XMM5,  XMM5b,
 419                       XMM6,  XMM6b,
 420                       XMM7,  XMM7b
 421 #ifdef _LP64
 422                      ,XMM8,  XMM8b,
 423                       XMM9,  XMM9b,
 424                       XMM10, XMM10b,
 425                       XMM11, XMM11b,
 426                       XMM12, XMM12b,
 427                       XMM13, XMM13b,
 428                       XMM14, XMM14b,
 429                       XMM15, XMM15b
 430 #endif
 431                       );
 432 







































 433 // Class for all 128bit vector registers
 434 reg_class vectorx_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,
 435                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 436                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 437                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 438                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 439                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 440                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 441                       XMM7,  XMM7b,  XMM7c,  XMM7d
 442 #ifdef _LP64
 443                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,
 444                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 445                       XMM10, XMM10b, XMM10c, XMM10d,
 446                       XMM11, XMM11b, XMM11c, XMM11d,
 447                       XMM12, XMM12b, XMM12c, XMM12d,
 448                       XMM13, XMM13b, XMM13c, XMM13d,
 449                       XMM14, XMM14b, XMM14c, XMM14d,
 450                       XMM15, XMM15b, XMM15c, XMM15d
 451 #endif
 452                       );
 453 







































 454 // Class for all 256bit vector registers
 455 reg_class vectory_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 456                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 457                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 458                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 459                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 460                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 461                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 462                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
 463 #ifdef _LP64
 464                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 465                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 466                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 467                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 468                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 469                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 470                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 471                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
 472 #endif
 473                       );
 474 












































































 475 %}
 476 
 477 
 478 //----------SOURCE BLOCK-------------------------------------------------------
 479 // This is a block of C++ code which provides values, functions, and
 480 // definitions necessary in the rest of the architecture description
 481 
 482 source_hpp %{
 483 // Header information of the source block.
 484 // Method declarations/definitions which are used outside
 485 // the ad-scope can conveniently be defined here.
 486 //
 487 // To keep related declarations/definitions/uses close together,
 488 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 489 
 490 class NativeJump;
 491 
 492 class CallStubImpl {
 493 
 494   //--------------------------------------------------------------


 606   static address float_signflip()  { return (address)float_signflip_pool; }
 607   static address double_signmask() { return (address)double_signmask_pool; }
 608   static address double_signflip() { return (address)double_signflip_pool; }
 609 #endif
 610 
 611 
 612 const bool Matcher::match_rule_supported(int opcode) {
 613   if (!has_match_rule(opcode))
 614     return false;
 615 
 616   switch (opcode) {
 617     case Op_PopCountI:
 618     case Op_PopCountL:
 619       if (!UsePopCountInstruction)
 620         return false;
 621     break;
 622     case Op_MulVI:
 623       if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
 624         return false;
 625     break;




 626     case Op_AddReductionVL:
 627       if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here
 628         return false;
 629     case Op_AddReductionVI:
 630       if (UseSSE < 3) // requires at least SSE3
 631         return false;
 632     case Op_MulReductionVI:
 633       if (UseSSE < 4) // requires at least SSE4
 634         return false;
 635     case Op_AddReductionVF:
 636     case Op_AddReductionVD:
 637     case Op_MulReductionVF:
 638     case Op_MulReductionVD:
 639       if (UseSSE < 1) // requires at least SSE
 640         return false;
 641     break;
 642     case Op_CompareAndSwapL:
 643 #ifdef _LP64
 644     case Op_CompareAndSwapP:
 645 #endif
 646       if (!VM_Version::supports_cx8())
 647         return false;
 648     break;
 649   }
 650 
 651   return true;  // Per default match rules are supported.
 652 }
 653 
 654 // Max vector size in bytes. 0 if not supported.
 655 const int Matcher::vector_width_in_bytes(BasicType bt) {
 656   assert(is_java_primitive(bt), "only primitive type vectors");
 657   if (UseSSE < 2) return 0;
 658   // SSE2 supports 128bit vectors for all types.
 659   // AVX2 supports 256bit vectors for all types.
 660   int size = (UseAVX > 1) ? 32 : 16;

 661   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 662   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 663     size = 32;
 664   // Use flag to limit vector size.
 665   size = MIN2(size,(int)MaxVectorSize);
 666   // Minimum 2 values in vector (or 4 for bytes).
 667   switch (bt) {
 668   case T_DOUBLE:
 669   case T_LONG:
 670     if (size < 16) return 0;
 671   case T_FLOAT:
 672   case T_INT:
 673     if (size < 8) return 0;
 674   case T_BOOLEAN:
 675   case T_BYTE:
 676   case T_CHAR:
 677   case T_SHORT:
 678     if (size < 4) return 0;
 679     break;
 680   default:
 681     ShouldNotReachHere();
 682   }
 683   return size;


 685 
 686 // Limits on vector size (number of elements) loaded into vector.
 687 const int Matcher::max_vector_size(const BasicType bt) {
 688   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 689 }
 690 const int Matcher::min_vector_size(const BasicType bt) {
 691   int max_size = max_vector_size(bt);
 692   // Min size which can be loaded into vector is 4 bytes.
 693   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 694   return MIN2(size,max_size);
 695 }
 696 
 697 // Vector ideal reg corresponding to specidied size in bytes
 698 const int Matcher::vector_ideal_reg(int size) {
 699   assert(MaxVectorSize >= size, "");
 700   switch(size) {
 701     case  4: return Op_VecS;
 702     case  8: return Op_VecD;
 703     case 16: return Op_VecX;
 704     case 32: return Op_VecY;

 705   }
 706   ShouldNotReachHere();
 707   return 0;
 708 }
 709 
 710 // Only lowest bits of xmm reg are used for vector shift count.
 711 const int Matcher::vector_shift_count_ideal_reg(int size) {
 712   return Op_VecS;
 713 }
 714 
 715 // x86 supports misaligned vectors store/load.
 716 const bool Matcher::misaligned_vectors_ok() {
 717   return !AlignVector; // can be changed by flag
 718 }
 719 
 720 // x86 AES instructions are compatible with SunJCE expanded
 721 // keys, hence we do not need to pass the original key to stubs
 722 const bool Matcher::pass_original_key_for_aes() {
 723   return false;
 724 }


 728                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 729   // In 64-bit VM size calculation is very complex. Emitting instructions
 730   // into scratch buffer is used to get size in 64-bit VM.
 731   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
 732   assert(ireg == Op_VecS || // 32bit vector
 733          (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 734          (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
 735          "no non-adjacent vector moves" );
 736   if (cbuf) {
 737     MacroAssembler _masm(cbuf);
 738     int offset = __ offset();
 739     switch (ireg) {
 740     case Op_VecS: // copy whole register
 741     case Op_VecD:
 742     case Op_VecX:
 743       __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 744       break;
 745     case Op_VecY:
 746       __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 747       break;



 748     default:
 749       ShouldNotReachHere();
 750     }
 751     int size = __ offset() - offset;
 752 #ifdef ASSERT
 753     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 754     assert(!do_size || size == 4, "incorrect size calculattion");
 755 #endif
 756     return size;
 757 #ifndef PRODUCT
 758   } else if (!do_size) {
 759     switch (ireg) {
 760     case Op_VecS:
 761     case Op_VecD:
 762     case Op_VecX:
 763       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 764       break;
 765     case Op_VecY:

 766       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 767       break;
 768     default:
 769       ShouldNotReachHere();
 770     }
 771 #endif
 772   }
 773   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 774   return 4;
 775 }
 776 
 777 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 778                             int stack_offset, int reg, uint ireg, outputStream* st) {
 779   // In 64-bit VM size calculation is very complex. Emitting instructions
 780   // into scratch buffer is used to get size in 64-bit VM.
 781   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
 782   if (cbuf) {
 783     MacroAssembler _masm(cbuf);
 784     int offset = __ offset();
 785     if (is_load) {
 786       switch (ireg) {
 787       case Op_VecS:
 788         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 789         break;
 790       case Op_VecD:
 791         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 792         break;
 793       case Op_VecX:
 794         __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 795         break;
 796       case Op_VecY:
 797         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 798         break;



 799       default:
 800         ShouldNotReachHere();
 801       }
 802     } else { // store
 803       switch (ireg) {
 804       case Op_VecS:
 805         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 806         break;
 807       case Op_VecD:
 808         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 809         break;
 810       case Op_VecX:
 811         __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 812         break;
 813       case Op_VecY:
 814         __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 815         break;



 816       default:
 817         ShouldNotReachHere();
 818       }
 819     }
 820     int size = __ offset() - offset;
 821 #ifdef ASSERT
 822     int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
 823     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 824     assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
 825 #endif
 826     return size;
 827 #ifndef PRODUCT
 828   } else if (!do_size) {
 829     if (is_load) {
 830       switch (ireg) {
 831       case Op_VecS:
 832         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 833         break;
 834       case Op_VecD:
 835         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 836         break;
 837        case Op_VecX:
 838         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 839         break;
 840       case Op_VecY:

 841         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 842         break;
 843       default:
 844         ShouldNotReachHere();
 845       }
 846     } else { // store
 847       switch (ireg) {
 848       case Op_VecS:
 849         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 850         break;
 851       case Op_VecD:
 852         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 853         break;
 854        case Op_VecX:
 855         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 856         break;
 857       case Op_VecY:

 858         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 859         break;
 860       default:
 861         ShouldNotReachHere();
 862       }
 863     }
 864 #endif
 865   }
 866   int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
 867   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 868   return 5+offset_size;
 869 }
 870 
 871 static inline jfloat replicate4_imm(int con, int width) {
 872   // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
 873   assert(width == 1 || width == 2, "only byte or short types here");
 874   int bit_width = width * 8;
 875   jint val = con;
 876   val &= (1 << bit_width) - 1;  // mask off sign bits
 877   while(bit_width < 32) {
 878     val |= (val << bit_width);
 879     bit_width <<= 1;
 880   }
 881   jfloat fval = *((jfloat*) &val);  // coerce to float type
 882   return fval;
 883 }
 884 
 885 static inline jdouble replicate8_imm(int con, int width) {
 886   // Load a constant of "width" (in bytes) and replicate it to fill 64bit.


 950       // Check that stack depth is unchanged: find majik cookie on stack
 951       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 952       MacroAssembler _masm(&cbuf);
 953       Label L;
 954       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 955       __ jccb(Assembler::equal, L);
 956       // Die if stack mismatch
 957       __ int3();
 958       __ bind(L);
 959     }
 960   %}
 961 
 962 %}
 963 
 964 
 965 //----------OPERANDS-----------------------------------------------------------
 966 // Operand definitions must precede instruction definitions for correct parsing
 967 // in the ADLC because operands constitute user defined types which are used in
 968 // instruction definitions.
 969 
 970 // Vectors
 971 operand vecS() %{
 972   constraint(ALLOC_IN_RC(vectors_reg));
 973   match(VecS);
 974 
 975   format %{ %}
 976   interface(REG_INTER);
 977 %}
 978 
 979 operand vecD() %{
 980   constraint(ALLOC_IN_RC(vectord_reg));
 981   match(VecD);
 982 
 983   format %{ %}
 984   interface(REG_INTER);
 985 %}
 986 
 987 operand vecX() %{
 988   constraint(ALLOC_IN_RC(vectorx_reg));
 989   match(VecX);
 990 
 991   format %{ %}
 992   interface(REG_INTER);
 993 %}
 994 
 995 operand vecY() %{
 996   constraint(ALLOC_IN_RC(vectory_reg));
 997   match(VecY);
 998 
 999   format %{ %}
1000   interface(REG_INTER);
1001 %}
1002 
1003 
1004 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
1005 
1006 // ============================================================================
1007 
1008 instruct ShouldNotReachHere() %{
1009   match(Halt);
1010   format %{ "int3\t# ShouldNotReachHere" %}
1011   ins_encode %{
1012     __ int3();
1013   %}
1014   ins_pipe(pipe_slow);
1015 %}
1016 
1017 // ============================================================================
1018 
1019 instruct addF_reg(regF dst, regF src) %{
1020   predicate((UseSSE>=1) && (UseAVX == 0));
1021   match(Set dst (AddF dst src));
1022 
1023   format %{ "addss   $dst, $src" %}


1584   ins_pipe(pipe_slow);
1585 %}
1586 
1587 instruct absF_reg(regF dst) %{
1588   predicate((UseSSE>=1) && (UseAVX == 0));
1589   match(Set dst (AbsF dst));
1590   ins_cost(150);
1591   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
1592   ins_encode %{
1593     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
1594   %}
1595   ins_pipe(pipe_slow);
1596 %}
1597 
1598 instruct absF_reg_reg(regF dst, regF src) %{
1599   predicate(UseAVX > 0);
1600   match(Set dst (AbsF src));
1601   ins_cost(150);
1602   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
1603   ins_encode %{
1604     bool vector256 = false;
1605     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
1606               ExternalAddress(float_signmask()), vector256);
1607   %}
1608   ins_pipe(pipe_slow);
1609 %}
1610 
1611 instruct absD_reg(regD dst) %{
1612   predicate((UseSSE>=2) && (UseAVX == 0));
1613   match(Set dst (AbsD dst));
1614   ins_cost(150);
1615   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
1616             "# abs double by sign masking" %}
1617   ins_encode %{
1618     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
1619   %}
1620   ins_pipe(pipe_slow);
1621 %}
1622 
1623 instruct absD_reg_reg(regD dst, regD src) %{
1624   predicate(UseAVX > 0);
1625   match(Set dst (AbsD src));
1626   ins_cost(150);
1627   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
1628             "# abs double by sign masking" %}
1629   ins_encode %{
1630     bool vector256 = false;
1631     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
1632               ExternalAddress(double_signmask()), vector256);
1633   %}
1634   ins_pipe(pipe_slow);
1635 %}
1636 
1637 instruct negF_reg(regF dst) %{
1638   predicate((UseSSE>=1) && (UseAVX == 0));
1639   match(Set dst (NegF dst));
1640   ins_cost(150);
1641   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
1642   ins_encode %{
1643     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
1644   %}
1645   ins_pipe(pipe_slow);
1646 %}
1647 
1648 instruct negF_reg_reg(regF dst, regF src) %{
1649   predicate(UseAVX > 0);
1650   match(Set dst (NegF src));
1651   ins_cost(150);
1652   format %{ "vxorps  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
1653   ins_encode %{
1654     bool vector256 = false;
1655     __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
1656               ExternalAddress(float_signflip()), vector256);
1657   %}
1658   ins_pipe(pipe_slow);
1659 %}
1660 
1661 instruct negD_reg(regD dst) %{
1662   predicate((UseSSE>=2) && (UseAVX == 0));
1663   match(Set dst (NegD dst));
1664   ins_cost(150);
1665   format %{ "xorpd   $dst, [0x8000000000000000]\t"
1666             "# neg double by sign flipping" %}
1667   ins_encode %{
1668     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
1669   %}
1670   ins_pipe(pipe_slow);
1671 %}
1672 
1673 instruct negD_reg_reg(regD dst, regD src) %{
1674   predicate(UseAVX > 0);
1675   match(Set dst (NegD src));
1676   ins_cost(150);
1677   format %{ "vxorpd  $dst, $src, [0x8000000000000000]\t"
1678             "# neg double by sign flipping" %}
1679   ins_encode %{
1680     bool vector256 = false;
1681     __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
1682               ExternalAddress(double_signflip()), vector256);
1683   %}
1684   ins_pipe(pipe_slow);
1685 %}
1686 
1687 instruct sqrtF_reg(regF dst, regF src) %{
1688   predicate(UseSSE>=1);
1689   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
1690 
1691   format %{ "sqrtss  $dst, $src" %}
1692   ins_cost(150);
1693   ins_encode %{
1694     __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
1695   %}
1696   ins_pipe(pipe_slow);
1697 %}
1698 
1699 instruct sqrtF_mem(regF dst, memory src) %{
1700   predicate(UseSSE>=1);
1701   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
1702 


1737 
1738   format %{ "sqrtsd  $dst, $src" %}
1739   ins_cost(150);
1740   ins_encode %{
1741     __ sqrtsd($dst$$XMMRegister, $src$$Address);
1742   %}
1743   ins_pipe(pipe_slow);
1744 %}
1745 
1746 instruct sqrtD_imm(regD dst, immD con) %{
1747   predicate(UseSSE>=2);
1748   match(Set dst (SqrtD con));
1749   format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1750   ins_cost(150);
1751   ins_encode %{
1752     __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
1753   %}
1754   ins_pipe(pipe_slow);
1755 %}
1756 
1757 
1758 // ====================VECTOR INSTRUCTIONS=====================================
1759 
1760 // Load vectors (4 bytes long)
1761 instruct loadV4(vecS dst, memory mem) %{
1762   predicate(n->as_LoadVector()->memory_size() == 4);
1763   match(Set dst (LoadVector mem));
1764   ins_cost(125);
1765   format %{ "movd    $dst,$mem\t! load vector (4 bytes)" %}
1766   ins_encode %{
1767     __ movdl($dst$$XMMRegister, $mem$$Address);
1768   %}
1769   ins_pipe( pipe_slow );
1770 %}
1771 
1772 // Load vectors (8 bytes long)
1773 instruct loadV8(vecD dst, memory mem) %{
1774   predicate(n->as_LoadVector()->memory_size() == 8);
1775   match(Set dst (LoadVector mem));
1776   ins_cost(125);
1777   format %{ "movq    $dst,$mem\t! load vector (8 bytes)" %}


1788   ins_cost(125);
1789   format %{ "movdqu  $dst,$mem\t! load vector (16 bytes)" %}
1790   ins_encode %{
1791     __ movdqu($dst$$XMMRegister, $mem$$Address);
1792   %}
1793   ins_pipe( pipe_slow );
1794 %}
1795 
1796 // Load vectors (32 bytes long)
1797 instruct loadV32(vecY dst, memory mem) %{
1798   predicate(n->as_LoadVector()->memory_size() == 32);
1799   match(Set dst (LoadVector mem));
1800   ins_cost(125);
1801   format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
1802   ins_encode %{
1803     __ vmovdqu($dst$$XMMRegister, $mem$$Address);
1804   %}
1805   ins_pipe( pipe_slow );
1806 %}
1807 













1808 // Store vectors
1809 instruct storeV4(memory mem, vecS src) %{
1810   predicate(n->as_StoreVector()->memory_size() == 4);
1811   match(Set mem (StoreVector mem src));
1812   ins_cost(145);
1813   format %{ "movd    $mem,$src\t! store vector (4 bytes)" %}
1814   ins_encode %{
1815     __ movdl($mem$$Address, $src$$XMMRegister);
1816   %}
1817   ins_pipe( pipe_slow );
1818 %}
1819 
1820 instruct storeV8(memory mem, vecD src) %{
1821   predicate(n->as_StoreVector()->memory_size() == 8);
1822   match(Set mem (StoreVector mem src));
1823   ins_cost(145);
1824   format %{ "movq    $mem,$src\t! store vector (8 bytes)" %}
1825   ins_encode %{
1826     __ movq($mem$$Address, $src$$XMMRegister);
1827   %}


1833   match(Set mem (StoreVector mem src));
1834   ins_cost(145);
1835   format %{ "movdqu  $mem,$src\t! store vector (16 bytes)" %}
1836   ins_encode %{
1837     __ movdqu($mem$$Address, $src$$XMMRegister);
1838   %}
1839   ins_pipe( pipe_slow );
1840 %}
1841 
1842 instruct storeV32(memory mem, vecY src) %{
1843   predicate(n->as_StoreVector()->memory_size() == 32);
1844   match(Set mem (StoreVector mem src));
1845   ins_cost(145);
1846   format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
1847   ins_encode %{
1848     __ vmovdqu($mem$$Address, $src$$XMMRegister);
1849   %}
1850   ins_pipe( pipe_slow );
1851 %}
1852 












1853 // Replicate byte scalar to be vector
1854 instruct Repl4B(vecS dst, rRegI src) %{
1855   predicate(n->as_Vector()->length() == 4);
1856   match(Set dst (ReplicateB src));
1857   format %{ "movd    $dst,$src\n\t"
1858             "punpcklbw $dst,$dst\n\t"
1859             "pshuflw $dst,$dst,0x00\t! replicate4B" %}
1860   ins_encode %{
1861     __ movdl($dst$$XMMRegister, $src$$Register);
1862     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1863     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1864   %}
1865   ins_pipe( pipe_slow );
1866 %}
1867 
1868 instruct Repl8B(vecD dst, rRegI src) %{
1869   predicate(n->as_Vector()->length() == 8);
1870   match(Set dst (ReplicateB src));
1871   format %{ "movd    $dst,$src\n\t"
1872             "punpcklbw $dst,$dst\n\t"


1896 %}
1897 
1898 instruct Repl32B(vecY dst, rRegI src) %{
1899   predicate(n->as_Vector()->length() == 32);
1900   match(Set dst (ReplicateB src));
1901   format %{ "movd    $dst,$src\n\t"
1902             "punpcklbw $dst,$dst\n\t"
1903             "pshuflw $dst,$dst,0x00\n\t"
1904             "punpcklqdq $dst,$dst\n\t"
1905             "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
1906   ins_encode %{
1907     __ movdl($dst$$XMMRegister, $src$$Register);
1908     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1909     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1910     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1911     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1912   %}
1913   ins_pipe( pipe_slow );
1914 %}
1915 




















1916 // Replicate byte scalar immediate to be vector by loading from const table.
1917 instruct Repl4B_imm(vecS dst, immI con) %{
1918   predicate(n->as_Vector()->length() == 4);
1919   match(Set dst (ReplicateB con));
1920   format %{ "movdl   $dst,[$constantaddress]\t! replicate4B($con)" %}
1921   ins_encode %{
1922     __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
1923   %}
1924   ins_pipe( pipe_slow );
1925 %}
1926 
1927 instruct Repl8B_imm(vecD dst, immI con) %{
1928   predicate(n->as_Vector()->length() == 8);
1929   match(Set dst (ReplicateB con));
1930   format %{ "movq    $dst,[$constantaddress]\t! replicate8B($con)" %}
1931   ins_encode %{
1932     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1933   %}
1934   ins_pipe( pipe_slow );
1935 %}


1943     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1944     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1945   %}
1946   ins_pipe( pipe_slow );
1947 %}
1948 
1949 instruct Repl32B_imm(vecY dst, immI con) %{
1950   predicate(n->as_Vector()->length() == 32);
1951   match(Set dst (ReplicateB con));
1952   format %{ "movq    $dst,[$constantaddress]\n\t"
1953             "punpcklqdq $dst,$dst\n\t"
1954             "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
1955   ins_encode %{
1956     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1957     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1958     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1959   %}
1960   ins_pipe( pipe_slow );
1961 %}
1962 
















1963 // Replicate byte scalar zero to be vector
1964 instruct Repl4B_zero(vecS dst, immI0 zero) %{
1965   predicate(n->as_Vector()->length() == 4);
1966   match(Set dst (ReplicateB zero));
1967   format %{ "pxor    $dst,$dst\t! replicate4B zero" %}
1968   ins_encode %{
1969     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1970   %}
1971   ins_pipe( fpu_reg_reg );
1972 %}
1973 
1974 instruct Repl8B_zero(vecD dst, immI0 zero) %{
1975   predicate(n->as_Vector()->length() == 8);
1976   match(Set dst (ReplicateB zero));
1977   format %{ "pxor    $dst,$dst\t! replicate8B zero" %}
1978   ins_encode %{
1979     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1980   %}
1981   ins_pipe( fpu_reg_reg );
1982 %}
1983 
1984 instruct Repl16B_zero(vecX dst, immI0 zero) %{
1985   predicate(n->as_Vector()->length() == 16);
1986   match(Set dst (ReplicateB zero));
1987   format %{ "pxor    $dst,$dst\t! replicate16B zero" %}
1988   ins_encode %{
1989     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1990   %}
1991   ins_pipe( fpu_reg_reg );
1992 %}
1993 
1994 instruct Repl32B_zero(vecY dst, immI0 zero) %{
1995   predicate(n->as_Vector()->length() == 32);
1996   match(Set dst (ReplicateB zero));
1997   format %{ "vpxor   $dst,$dst,$dst\t! replicate32B zero" %}
1998   ins_encode %{
1999     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2000     bool vector256 = true;
2001     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);












2002   %}
2003   ins_pipe( fpu_reg_reg );
2004 %}
2005 
2006 // Replicate char/short (2 byte) scalar to be vector
2007 instruct Repl2S(vecS dst, rRegI src) %{
2008   predicate(n->as_Vector()->length() == 2);
2009   match(Set dst (ReplicateS src));
2010   format %{ "movd    $dst,$src\n\t"
2011             "pshuflw $dst,$dst,0x00\t! replicate2S" %}
2012   ins_encode %{
2013     __ movdl($dst$$XMMRegister, $src$$Register);
2014     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2015   %}
2016   ins_pipe( fpu_reg_reg );
2017 %}
2018 
2019 instruct Repl4S(vecD dst, rRegI src) %{
2020   predicate(n->as_Vector()->length() == 4);
2021   match(Set dst (ReplicateS src));


2041   %}
2042   ins_pipe( pipe_slow );
2043 %}
2044 
2045 instruct Repl16S(vecY dst, rRegI src) %{
2046   predicate(n->as_Vector()->length() == 16);
2047   match(Set dst (ReplicateS src));
2048   format %{ "movd    $dst,$src\n\t"
2049             "pshuflw $dst,$dst,0x00\n\t"
2050             "punpcklqdq $dst,$dst\n\t"
2051             "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
2052   ins_encode %{
2053     __ movdl($dst$$XMMRegister, $src$$Register);
2054     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2055     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2056     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2057   %}
2058   ins_pipe( pipe_slow );
2059 %}
2060 


















2061 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
2062 instruct Repl2S_imm(vecS dst, immI con) %{
2063   predicate(n->as_Vector()->length() == 2);
2064   match(Set dst (ReplicateS con));
2065   format %{ "movdl   $dst,[$constantaddress]\t! replicate2S($con)" %}
2066   ins_encode %{
2067     __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
2068   %}
2069   ins_pipe( fpu_reg_reg );
2070 %}
2071 
2072 instruct Repl4S_imm(vecD dst, immI con) %{
2073   predicate(n->as_Vector()->length() == 4);
2074   match(Set dst (ReplicateS con));
2075   format %{ "movq    $dst,[$constantaddress]\t! replicate4S($con)" %}
2076   ins_encode %{
2077     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
2078   %}
2079   ins_pipe( fpu_reg_reg );
2080 %}


2088     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
2089     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2090   %}
2091   ins_pipe( pipe_slow );
2092 %}
2093 
2094 instruct Repl16S_imm(vecY dst, immI con) %{
2095   predicate(n->as_Vector()->length() == 16);
2096   match(Set dst (ReplicateS con));
2097   format %{ "movq    $dst,[$constantaddress]\n\t"
2098             "punpcklqdq $dst,$dst\n\t"
2099             "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
2100   ins_encode %{
2101     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
2102     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2103     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2104   %}
2105   ins_pipe( pipe_slow );
2106 %}
2107 
















2108 // Replicate char/short (2 byte) scalar zero to be vector
2109 instruct Repl2S_zero(vecS dst, immI0 zero) %{
2110   predicate(n->as_Vector()->length() == 2);
2111   match(Set dst (ReplicateS zero));
2112   format %{ "pxor    $dst,$dst\t! replicate2S zero" %}
2113   ins_encode %{
2114     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2115   %}
2116   ins_pipe( fpu_reg_reg );
2117 %}
2118 
2119 instruct Repl4S_zero(vecD dst, immI0 zero) %{
2120   predicate(n->as_Vector()->length() == 4);
2121   match(Set dst (ReplicateS zero));
2122   format %{ "pxor    $dst,$dst\t! replicate4S zero" %}
2123   ins_encode %{
2124     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2125   %}
2126   ins_pipe( fpu_reg_reg );
2127 %}
2128 
2129 instruct Repl8S_zero(vecX dst, immI0 zero) %{
2130   predicate(n->as_Vector()->length() == 8);
2131   match(Set dst (ReplicateS zero));
2132   format %{ "pxor    $dst,$dst\t! replicate8S zero" %}
2133   ins_encode %{
2134     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2135   %}
2136   ins_pipe( fpu_reg_reg );
2137 %}
2138 
2139 instruct Repl16S_zero(vecY dst, immI0 zero) %{
2140   predicate(n->as_Vector()->length() == 16);
2141   match(Set dst (ReplicateS zero));
2142   format %{ "vpxor   $dst,$dst,$dst\t! replicate16S zero" %}
2143   ins_encode %{
2144     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2145     bool vector256 = true;
2146     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);












2147   %}
2148   ins_pipe( fpu_reg_reg );
2149 %}
2150 
2151 // Replicate integer (4 byte) scalar to be vector
2152 instruct Repl2I(vecD dst, rRegI src) %{
2153   predicate(n->as_Vector()->length() == 2);
2154   match(Set dst (ReplicateI src));
2155   format %{ "movd    $dst,$src\n\t"
2156             "pshufd  $dst,$dst,0x00\t! replicate2I" %}
2157   ins_encode %{
2158     __ movdl($dst$$XMMRegister, $src$$Register);
2159     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2160   %}
2161   ins_pipe( fpu_reg_reg );
2162 %}
2163 
2164 instruct Repl4I(vecX dst, rRegI src) %{
2165   predicate(n->as_Vector()->length() == 4);
2166   match(Set dst (ReplicateI src));


2170     __ movdl($dst$$XMMRegister, $src$$Register);
2171     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2172   %}
2173   ins_pipe( pipe_slow );
2174 %}
2175 
2176 instruct Repl8I(vecY dst, rRegI src) %{
2177   predicate(n->as_Vector()->length() == 8);
2178   match(Set dst (ReplicateI src));
2179   format %{ "movd    $dst,$src\n\t"
2180             "pshufd  $dst,$dst,0x00\n\t"
2181             "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
2182   ins_encode %{
2183     __ movdl($dst$$XMMRegister, $src$$Register);
2184     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2185     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2186   %}
2187   ins_pipe( pipe_slow );
2188 %}
2189 
















2190 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
2191 instruct Repl2I_imm(vecD dst, immI con) %{
2192   predicate(n->as_Vector()->length() == 2);
2193   match(Set dst (ReplicateI con));
2194   format %{ "movq    $dst,[$constantaddress]\t! replicate2I($con)" %}
2195   ins_encode %{
2196     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2197   %}
2198   ins_pipe( fpu_reg_reg );
2199 %}
2200 
2201 instruct Repl4I_imm(vecX dst, immI con) %{
2202   predicate(n->as_Vector()->length() == 4);
2203   match(Set dst (ReplicateI con));
2204   format %{ "movq    $dst,[$constantaddress]\t! replicate4I($con)\n\t"
2205             "punpcklqdq $dst,$dst" %}
2206   ins_encode %{
2207     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2208     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2209   %}
2210   ins_pipe( pipe_slow );
2211 %}
2212 
2213 instruct Repl8I_imm(vecY dst, immI con) %{
2214   predicate(n->as_Vector()->length() == 8);
2215   match(Set dst (ReplicateI con));
2216   format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
2217             "punpcklqdq $dst,$dst\n\t"
2218             "vinserti128h $dst,$dst,$dst" %}
2219   ins_encode %{
2220     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2221     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2222     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2223   %}
2224   ins_pipe( pipe_slow );
2225 %}
2226 
2227 // Integer could be loaded into xmm register directly from memory.
2228 instruct Repl2I_mem(vecD dst, memory mem) %{
2229   predicate(n->as_Vector()->length() == 2);
2230   match(Set dst (ReplicateI (LoadI mem)));
2231   format %{ "movd    $dst,$mem\n\t"
2232             "pshufd  $dst,$dst,0x00\t! replicate2I" %}
2233   ins_encode %{
















2234     __ movdl($dst$$XMMRegister, $mem$$Address);
2235     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2236   %}
2237   ins_pipe( fpu_reg_reg );
2238 %}
2239 
2240 instruct Repl4I_mem(vecX dst, memory mem) %{
2241   predicate(n->as_Vector()->length() == 4);
2242   match(Set dst (ReplicateI (LoadI mem)));
2243   format %{ "movd    $dst,$mem\n\t"
2244             "pshufd  $dst,$dst,0x00\t! replicate4I" %}
2245   ins_encode %{
2246     __ movdl($dst$$XMMRegister, $mem$$Address);
2247     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2248   %}
2249   ins_pipe( pipe_slow );
2250 %}
2251 
2252 instruct Repl8I_mem(vecY dst, memory mem) %{
2253   predicate(n->as_Vector()->length() == 8);
2254   match(Set dst (ReplicateI (LoadI mem)));
2255   format %{ "movd    $dst,$mem\n\t"
2256             "pshufd  $dst,$dst,0x00\n\t"
2257             "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
2258   ins_encode %{
2259     __ movdl($dst$$XMMRegister, $mem$$Address);
2260     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2261     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2262   %}
2263   ins_pipe( pipe_slow );
2264 %}
2265 
















2266 // Replicate integer (4 byte) scalar zero to be vector
2267 instruct Repl2I_zero(vecD dst, immI0 zero) %{
2268   predicate(n->as_Vector()->length() == 2);
2269   match(Set dst (ReplicateI zero));
2270   format %{ "pxor    $dst,$dst\t! replicate2I" %}
2271   ins_encode %{
2272     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2273   %}
2274   ins_pipe( fpu_reg_reg );
2275 %}
2276 
2277 instruct Repl4I_zero(vecX dst, immI0 zero) %{
2278   predicate(n->as_Vector()->length() == 4);
2279   match(Set dst (ReplicateI zero));
2280   format %{ "pxor    $dst,$dst\t! replicate4I zero)" %}
2281   ins_encode %{
2282     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2283   %}
2284   ins_pipe( fpu_reg_reg );
2285 %}
2286 
2287 instruct Repl8I_zero(vecY dst, immI0 zero) %{
2288   predicate(n->as_Vector()->length() == 8);
2289   match(Set dst (ReplicateI zero));
2290   format %{ "vpxor   $dst,$dst,$dst\t! replicate8I zero" %}
2291   ins_encode %{
2292     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2293     bool vector256 = true;
2294     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);












2295   %}
2296   ins_pipe( fpu_reg_reg );
2297 %}
2298 
2299 // Replicate long (8 byte) scalar to be vector
2300 #ifdef _LP64
2301 instruct Repl2L(vecX dst, rRegL src) %{
2302   predicate(n->as_Vector()->length() == 2);
2303   match(Set dst (ReplicateL src));
2304   format %{ "movdq   $dst,$src\n\t"
2305             "punpcklqdq $dst,$dst\t! replicate2L" %}
2306   ins_encode %{
2307     __ movdq($dst$$XMMRegister, $src$$Register);
2308     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2309   %}
2310   ins_pipe( pipe_slow );
2311 %}
2312 
2313 instruct Repl4L(vecY dst, rRegL src) %{
2314   predicate(n->as_Vector()->length() == 4);
2315   match(Set dst (ReplicateL src));
2316   format %{ "movdq   $dst,$src\n\t"
2317             "punpcklqdq $dst,$dst\n\t"
2318             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2319   ins_encode %{
2320     __ movdq($dst$$XMMRegister, $src$$Register);
2321     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2322     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2323   %}
2324   ins_pipe( pipe_slow );
2325 %}
















2326 #else // _LP64
2327 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
2328   predicate(n->as_Vector()->length() == 2);
2329   match(Set dst (ReplicateL src));
2330   effect(TEMP dst, USE src, TEMP tmp);
2331   format %{ "movdl   $dst,$src.lo\n\t"
2332             "movdl   $tmp,$src.hi\n\t"
2333             "punpckldq $dst,$tmp\n\t"
2334             "punpcklqdq $dst,$dst\t! replicate2L"%}
2335   ins_encode %{
2336     __ movdl($dst$$XMMRegister, $src$$Register);
2337     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
2338     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
2339     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2340   %}
2341   ins_pipe( pipe_slow );
2342 %}
2343 
2344 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
2345   predicate(n->as_Vector()->length() == 4);
2346   match(Set dst (ReplicateL src));
2347   effect(TEMP dst, USE src, TEMP tmp);
2348   format %{ "movdl   $dst,$src.lo\n\t"
2349             "movdl   $tmp,$src.hi\n\t"
2350             "punpckldq $dst,$tmp\n\t"
2351             "punpcklqdq $dst,$dst\n\t"
2352             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2353   ins_encode %{
2354     __ movdl($dst$$XMMRegister, $src$$Register);
2355     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
2356     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
2357     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2358     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2359   %}
2360   ins_pipe( pipe_slow );
2361 %}




















2362 #endif // _LP64
2363 
2364 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
2365 instruct Repl2L_imm(vecX dst, immL con) %{
2366   predicate(n->as_Vector()->length() == 2);
2367   match(Set dst (ReplicateL con));
2368   format %{ "movq    $dst,[$constantaddress]\n\t"
2369             "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
2370   ins_encode %{
2371     __ movq($dst$$XMMRegister, $constantaddress($con));
2372     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2373   %}
2374   ins_pipe( pipe_slow );
2375 %}
2376 
2377 instruct Repl4L_imm(vecY dst, immL con) %{
2378   predicate(n->as_Vector()->length() == 4);
2379   match(Set dst (ReplicateL con));
2380   format %{ "movq    $dst,[$constantaddress]\n\t"
2381             "punpcklqdq $dst,$dst\n\t"
2382             "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
2383   ins_encode %{
2384     __ movq($dst$$XMMRegister, $constantaddress($con));
2385     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2386     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2387   %}
2388   ins_pipe( pipe_slow );
2389 %}
2390 
















2391 // Long could be loaded into xmm register directly from memory.
2392 instruct Repl2L_mem(vecX dst, memory mem) %{
2393   predicate(n->as_Vector()->length() == 2);
2394   match(Set dst (ReplicateL (LoadL mem)));
2395   format %{ "movq    $dst,$mem\n\t"
2396             "punpcklqdq $dst,$dst\t! replicate2L" %}
2397   ins_encode %{
2398     __ movq($dst$$XMMRegister, $mem$$Address);
2399     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2400   %}
2401   ins_pipe( pipe_slow );
2402 %}
2403 
2404 instruct Repl4L_mem(vecY dst, memory mem) %{
2405   predicate(n->as_Vector()->length() == 4);
2406   match(Set dst (ReplicateL (LoadL mem)));
2407   format %{ "movq    $dst,$mem\n\t"
2408             "punpcklqdq $dst,$dst\n\t"
2409             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2410   ins_encode %{
2411     __ movq($dst$$XMMRegister, $mem$$Address);
2412     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2413     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2414   %}
2415   ins_pipe( pipe_slow );
2416 %}
2417 
















2418 // Replicate long (8 byte) scalar zero to be vector
2419 instruct Repl2L_zero(vecX dst, immL0 zero) %{
2420   predicate(n->as_Vector()->length() == 2);
2421   match(Set dst (ReplicateL zero));
2422   format %{ "pxor    $dst,$dst\t! replicate2L zero" %}
2423   ins_encode %{
2424     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2425   %}
2426   ins_pipe( fpu_reg_reg );
2427 %}
2428 
2429 instruct Repl4L_zero(vecY dst, immL0 zero) %{
2430   predicate(n->as_Vector()->length() == 4);
2431   match(Set dst (ReplicateL zero));
2432   format %{ "vpxor   $dst,$dst,$dst\t! replicate4L zero" %}
2433   ins_encode %{
2434     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2435     bool vector256 = true;
2436     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);












2437   %}
2438   ins_pipe( fpu_reg_reg );
2439 %}
2440 
2441 // Replicate float (4 byte) scalar to be vector
2442 instruct Repl2F(vecD dst, regF src) %{
2443   predicate(n->as_Vector()->length() == 2);
2444   match(Set dst (ReplicateF src));
2445   format %{ "pshufd  $dst,$dst,0x00\t! replicate2F" %}
2446   ins_encode %{
2447     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2448   %}
2449   ins_pipe( fpu_reg_reg );
2450 %}
2451 
2452 instruct Repl4F(vecX dst, regF src) %{
2453   predicate(n->as_Vector()->length() == 4);
2454   match(Set dst (ReplicateF src));
2455   format %{ "pshufd  $dst,$dst,0x00\t! replicate4F" %}
2456   ins_encode %{
2457     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2458   %}
2459   ins_pipe( pipe_slow );
2460 %}
2461 
2462 instruct Repl8F(vecY dst, regF src) %{
2463   predicate(n->as_Vector()->length() == 8);
2464   match(Set dst (ReplicateF src));
2465   format %{ "pshufd  $dst,$src,0x00\n\t"
2466             "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
2467   ins_encode %{
2468     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2469     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2470   %}
2471   ins_pipe( pipe_slow );
2472 %}
2473 














2474 // Replicate float (4 byte) scalar zero to be vector
2475 instruct Repl2F_zero(vecD dst, immF0 zero) %{
2476   predicate(n->as_Vector()->length() == 2);
2477   match(Set dst (ReplicateF zero));
2478   format %{ "xorps   $dst,$dst\t! replicate2F zero" %}
2479   ins_encode %{
2480     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
2481   %}
2482   ins_pipe( fpu_reg_reg );
2483 %}
2484 
2485 instruct Repl4F_zero(vecX dst, immF0 zero) %{
2486   predicate(n->as_Vector()->length() == 4);
2487   match(Set dst (ReplicateF zero));
2488   format %{ "xorps   $dst,$dst\t! replicate4F zero" %}
2489   ins_encode %{
2490     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
2491   %}
2492   ins_pipe( fpu_reg_reg );
2493 %}
2494 
2495 instruct Repl8F_zero(vecY dst, immF0 zero) %{
2496   predicate(n->as_Vector()->length() == 8);
2497   match(Set dst (ReplicateF zero));
2498   format %{ "vxorps  $dst,$dst,$dst\t! replicate8F zero" %}
2499   ins_encode %{
2500     bool vector256 = true;
2501     __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);











2502   %}
2503   ins_pipe( fpu_reg_reg );
2504 %}
2505 
2506 // Replicate double (8 bytes) scalar to be vector
2507 instruct Repl2D(vecX dst, regD src) %{
2508   predicate(n->as_Vector()->length() == 2);
2509   match(Set dst (ReplicateD src));
2510   format %{ "pshufd  $dst,$src,0x44\t! replicate2D" %}
2511   ins_encode %{
2512     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
2513   %}
2514   ins_pipe( pipe_slow );
2515 %}
2516 
2517 instruct Repl4D(vecY dst, regD src) %{
2518   predicate(n->as_Vector()->length() == 4);
2519   match(Set dst (ReplicateD src));
2520   format %{ "pshufd  $dst,$src,0x44\n\t"
2521             "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
2522   ins_encode %{
2523     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
2524     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2525   %}
2526   ins_pipe( pipe_slow );
2527 %}
2528 














2529 // Replicate double (8 byte) scalar zero to be vector
2530 instruct Repl2D_zero(vecX dst, immD0 zero) %{
2531   predicate(n->as_Vector()->length() == 2);
2532   match(Set dst (ReplicateD zero));
2533   format %{ "xorpd   $dst,$dst\t! replicate2D zero" %}
2534   ins_encode %{
2535     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
2536   %}
2537   ins_pipe( fpu_reg_reg );
2538 %}
2539 
2540 instruct Repl4D_zero(vecY dst, immD0 zero) %{
2541   predicate(n->as_Vector()->length() == 4);
2542   match(Set dst (ReplicateD zero));
2543   format %{ "vxorpd  $dst,$dst,$dst,vect256\t! replicate4D zero" %}
2544   ins_encode %{
2545     bool vector256 = true;
2546     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);











2547   %}
2548   ins_pipe( fpu_reg_reg );
2549 %}
2550 
2551 // ====================REDUCTION ARITHMETIC=======================================
2552 
2553 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
2554   predicate(UseSSE > 2 && UseAVX == 0);
2555   match(Set dst (AddReductionVI src1 src2));
2556   effect(TEMP tmp2, TEMP tmp);
2557   format %{ "movdqu  $tmp2,$src2\n\t"
2558             "phaddd  $tmp2,$tmp2\n\t"
2559             "movd    $tmp,$src1\n\t"
2560             "paddd   $tmp,$tmp2\n\t"
2561             "movd    $dst,$tmp\t! add reduction2I" %}
2562   ins_encode %{
2563     __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
2564     __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
2565     __ movdl($tmp$$XMMRegister, $src1$$Register);
2566     __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
2567     __ movdl($dst$$Register, $tmp$$XMMRegister);
2568   %}
2569   ins_pipe( pipe_slow );
2570 %}
2571 
2572 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
2573   predicate(UseAVX > 0);
2574   match(Set dst (AddReductionVI src1 src2));
2575   effect(TEMP tmp, TEMP tmp2);
2576   format %{ "vphaddd $tmp,$src2,$src2\n\t"
2577             "movd    $tmp2,$src1\n\t"
2578             "vpaddd  $tmp2,$tmp2,$tmp\n\t"
2579             "movd    $dst,$tmp2\t! add reduction2I" %}
2580   ins_encode %{
2581     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, false);





















2582     __ movdl($tmp2$$XMMRegister, $src1$$Register);
2583     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false);
2584     __ movdl($dst$$Register, $tmp2$$XMMRegister);
2585   %}
2586   ins_pipe( pipe_slow );
2587 %}
2588 
2589 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
2590   predicate(UseSSE > 2 && UseAVX == 0);
2591   match(Set dst (AddReductionVI src1 src2));
2592   effect(TEMP tmp2, TEMP tmp);
2593   format %{ "movdqu  $tmp2,$src2\n\t"
2594             "phaddd  $tmp2,$tmp2\n\t"
2595             "phaddd  $tmp2,$tmp2\n\t"
2596             "movd    $tmp,$src1\n\t"
2597             "paddd   $tmp,$tmp2\n\t"
2598             "movd    $dst,$tmp\t! add reduction4I" %}
2599   ins_encode %{
2600     __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
2601     __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
2602     __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
2603     __ movdl($tmp$$XMMRegister, $src1$$Register);
2604     __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
2605     __ movdl($dst$$Register, $tmp$$XMMRegister);
2606   %}
2607   ins_pipe( pipe_slow );
2608 %}
2609 
2610 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
2611   predicate(UseAVX > 0);
2612   match(Set dst (AddReductionVI src1 src2));
2613   effect(TEMP tmp, TEMP tmp2);
2614   format %{ "vphaddd $tmp,$src2,$src2\n\t"
2615             "vphaddd $tmp,$tmp,$tmp2\n\t"
2616             "movd    $tmp2,$src1\n\t"
2617             "vpaddd  $tmp2,$tmp2,$tmp\n\t"
2618             "movd    $dst,$tmp2\t! add reduction4I" %}
2619   ins_encode %{
2620     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, false);
2621     __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);

























2622     __ movdl($tmp2$$XMMRegister, $src1$$Register);
2623     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false);
2624     __ movdl($dst$$Register, $tmp2$$XMMRegister);
2625   %}
2626   ins_pipe( pipe_slow );
2627 %}
2628 
2629 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
2630   predicate(UseAVX > 0);
2631   match(Set dst (AddReductionVI src1 src2));
2632   effect(TEMP tmp, TEMP tmp2);
2633   format %{ "vphaddd $tmp,$src2,$src2\n\t"
2634             "vphaddd $tmp,$tmp,$tmp2\n\t"
2635             "vextractf128  $tmp2,$tmp\n\t"
2636             "vpaddd  $tmp,$tmp,$tmp2\n\t"
2637             "movd    $tmp2,$src1\n\t"
2638             "vpaddd  $tmp2,$tmp2,$tmp\n\t"
2639             "movd    $dst,$tmp2\t! add reduction8I" %}
2640   ins_encode %{
2641     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, true);
2642     __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, true);
2643     __ vextractf128h($tmp2$$XMMRegister, $tmp$$XMMRegister);
2644     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);

2645     __ movdl($tmp2$$XMMRegister, $src1$$Register);
2646     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false);
2647     __ movdl($dst$$Register, $tmp2$$XMMRegister);
2648   %}
2649   ins_pipe( pipe_slow );
2650 %}
2651 


































































































































2652 instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
2653   predicate(UseSSE >= 1 && UseAVX == 0);
2654   match(Set dst (AddReductionVF src1 src2));
2655   effect(TEMP tmp, TEMP tmp2);
2656   format %{ "movdqu  $tmp,$src1\n\t"
2657             "addss   $tmp,$src2\n\t"
2658             "pshufd  $tmp2,$src2,0x01\n\t"
2659             "addss   $tmp,$tmp2\n\t"
2660             "movdqu  $dst,$tmp\t! add reduction2F" %}
2661   ins_encode %{
2662     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
2663     __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
2664     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
2665     __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
2666     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
2667   %}
2668   ins_pipe( pipe_slow );
2669 %}
2670 
2671 instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{


2755   ins_encode %{
2756     __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2757     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
2758     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
2759     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
2760     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
2761     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
2762     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
2763     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
2764     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
2765     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
2766     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
2767     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
2768     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
2769     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
2770     __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
2771   %}
2772   ins_pipe( pipe_slow );
2773 %}
2774 







































































2775 instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
2776   predicate(UseSSE >= 1 && UseAVX == 0);
2777   match(Set dst (AddReductionVD src1 src2));
2778   effect(TEMP tmp, TEMP dst);
2779   format %{ "movdqu  $tmp,$src1\n\t"
2780             "addsd   $tmp,$src2\n\t"
2781             "pshufd  $dst,$src2,0xE\n\t"
2782             "addsd   $dst,$tmp\t! add reduction2D" %}
2783   ins_encode %{
2784     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
2785     __ addsd($tmp$$XMMRegister, $src2$$XMMRegister);
2786     __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
2787     __ addsd($dst$$XMMRegister, $tmp$$XMMRegister);
2788   %}
2789   ins_pipe( pipe_slow );
2790 %}
2791 
2792 instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
2793   predicate(UseAVX > 0);
2794   match(Set dst (AddReductionVD src1 src2));


2802     __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
2803   %}
2804   ins_pipe( pipe_slow );
2805 %}
2806 
2807 instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
2808   predicate(UseAVX > 0);
2809   match(Set dst (AddReductionVD src1 src2));
2810   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
2811   format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
2812             "pshufd  $tmp,$src2,0xE\n\t"
2813             "vaddsd  $tmp2,$tmp2,$tmp\n\t"
2814             "vextractf128  $tmp3,$src2\n\t"
2815             "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
2816             "pshufd  $tmp,$tmp3,0xE\n\t"
2817             "vaddsd  $dst,$tmp2,$tmp\t! add reduction4D" %}
2818   ins_encode %{
2819     __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2820     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
2821     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
2822     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);







































2823     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
2824     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
2825     __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
2826   %}
2827   ins_pipe( pipe_slow );
2828 %}
2829 
2830 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
2831   predicate(UseSSE > 3 && UseAVX == 0);
2832   match(Set dst (MulReductionVI src1 src2));
2833   effect(TEMP tmp, TEMP tmp2);
2834   format %{ "pshufd  $tmp2,$src2,0x1\n\t"
2835             "pmulld  $tmp2,$src2\n\t"
2836             "movd    $tmp,$src1\n\t"
2837             "pmulld  $tmp2,$tmp\n\t"
2838             "movd    $dst,$tmp2\t! mul reduction2I" %}
2839   ins_encode %{
2840     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
2841     __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
2842     __ movdl($tmp$$XMMRegister, $src1$$Register);
2843     __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
2844     __ movdl($dst$$Register, $tmp2$$XMMRegister);
2845   %}
2846   ins_pipe( pipe_slow );
2847 %}
2848 
2849 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
2850   predicate(UseAVX > 0);
2851   match(Set dst (MulReductionVI src1 src2));
2852   effect(TEMP tmp, TEMP tmp2);
2853   format %{ "pshufd  $tmp2,$src2,0x1\n\t"
2854             "vpmulld $tmp,$src2,$tmp2\n\t"
2855             "movd    $tmp2,$src1\n\t"
2856             "vpmulld $tmp2,$tmp,$tmp2\n\t"
2857             "movd    $dst,$tmp2\t! mul reduction2I" %}
2858   ins_encode %{

2859     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
2860     __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, false);
2861     __ movdl($tmp2$$XMMRegister, $src1$$Register);
2862     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
2863     __ movdl($dst$$Register, $tmp2$$XMMRegister);
2864   %}
2865   ins_pipe( pipe_slow );
2866 %}
2867 
2868 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
2869   predicate(UseSSE > 3 && UseAVX == 0);
2870   match(Set dst (MulReductionVI src1 src2));
2871   effect(TEMP tmp, TEMP tmp2);
2872   format %{ "pshufd  $tmp2,$src2,0xE\n\t"
2873             "pmulld  $tmp2,$src2\n\t"
2874             "pshufd  $tmp,$tmp2,0x1\n\t"
2875             "pmulld  $tmp2,$tmp\n\t"
2876             "movd    $tmp,$src1\n\t"
2877             "pmulld  $tmp2,$tmp\n\t"
2878             "movd    $dst,$tmp2\t! mul reduction4I" %}
2879   ins_encode %{
2880     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
2881     __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
2882     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1);
2883     __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
2884     __ movdl($tmp$$XMMRegister, $src1$$Register);
2885     __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
2886     __ movdl($dst$$Register, $tmp2$$XMMRegister);
2887   %}
2888   ins_pipe( pipe_slow );
2889 %}
2890 
2891 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
2892   predicate(UseAVX > 0);
2893   match(Set dst (MulReductionVI src1 src2));
2894   effect(TEMP tmp, TEMP tmp2);
2895   format %{ "pshufd  $tmp2,$src2,0xE\n\t"
2896             "vpmulld $tmp,$src2,$tmp2\n\t"
2897             "pshufd  $tmp2,$tmp,0x1\n\t"
2898             "vpmulld $tmp,$tmp,$tmp2\n\t"
2899             "movd    $tmp2,$src1\n\t"
2900             "vpmulld $tmp2,$tmp,$tmp2\n\t"
2901             "movd    $dst,$tmp2\t! mul reduction4I" %}
2902   ins_encode %{

2903     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
2904     __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, false);
2905     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
2906     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
2907     __ movdl($tmp2$$XMMRegister, $src1$$Register);
2908     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
2909     __ movdl($dst$$Register, $tmp2$$XMMRegister);
2910   %}
2911   ins_pipe( pipe_slow );
2912 %}
2913 
2914 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
2915   predicate(UseAVX > 0);
2916   match(Set dst (MulReductionVI src1 src2));
2917   effect(TEMP tmp, TEMP tmp2);
2918   format %{ "vextractf128  $tmp,$src2\n\t"
2919             "vpmulld $tmp,$tmp,$src2\n\t"
2920             "pshufd  $tmp2,$tmp,0xE\n\t"
2921             "vpmulld $tmp,$tmp,$tmp2\n\t"
2922             "pshufd  $tmp2,$tmp,0x1\n\t"
2923             "vpmulld $tmp,$tmp,$tmp2\n\t"
2924             "movd    $tmp2,$src1\n\t"
2925             "vpmulld $tmp2,$tmp,$tmp2\n\t"
2926             "movd    $dst,$tmp2\t! mul reduction8I" %}
2927   ins_encode %{
2928     __ vextractf128h($tmp$$XMMRegister, $src2$$XMMRegister);
2929     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, false);
































2930     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
2931     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
2932     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
2933     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
2934     __ movdl($tmp2$$XMMRegister, $src1$$Register);
2935     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
2936     __ movdl($dst$$Register, $tmp2$$XMMRegister);
2937   %}
2938   ins_pipe( pipe_slow );
2939 %}
2940 
2941 instruct rsmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{







































































2942   predicate(UseSSE >= 1 && UseAVX == 0);
2943   match(Set dst (MulReductionVF src1 src2));
2944   effect(TEMP tmp, TEMP tmp2);
2945   format %{ "movdqu  $tmp,$src1\n\t"
2946             "mulss   $tmp,$src2\n\t"
2947             "pshufd  $tmp2,$src2,0x01\n\t"
2948             "mulss   $tmp,$tmp2\n\t"
2949             "movdqu  $dst,$tmp\t! add reduction2F" %}
2950   ins_encode %{
2951     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
2952     __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
2953     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
2954     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
2955     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
2956   %}
2957   ins_pipe( pipe_slow );
2958 %}
2959 
2960 instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
2961   predicate(UseAVX > 0);
2962   match(Set dst (MulReductionVF src1 src2));
2963   effect(TEMP tmp, TEMP tmp2);
2964   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
2965             "pshufd  $tmp,$src2,0x01\n\t"
2966             "vmulss  $dst,$tmp2,$tmp\t! add reduction2F" %}
2967   ins_encode %{
2968     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2969     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
2970     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
2971   %}
2972   ins_pipe( pipe_slow );
2973 %}
2974 
2975 instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
2976   predicate(UseSSE >= 1 && UseAVX == 0);
2977   match(Set dst (MulReductionVF src1 src2));
2978   effect(TEMP tmp, TEMP tmp2);
2979   format %{ "movdqu  $tmp,$src1\n\t"
2980             "mulss   $tmp,$src2\n\t"
2981             "pshufd  $tmp2,$src2,0x01\n\t"
2982             "mulss   $tmp,$tmp2\n\t"
2983             "pshufd  $tmp2,$src2,0x02\n\t"
2984             "mulss   $tmp,$tmp2\n\t"
2985             "pshufd  $tmp2,$src2,0x03\n\t"
2986             "mulss   $tmp,$tmp2\n\t"
2987             "movdqu  $dst,$tmp\t! add reduction4F" %}
2988   ins_encode %{
2989     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
2990     __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
2991     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
2992     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
2993     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
2994     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
2995     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
2996     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
2997     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
2998   %}
2999   ins_pipe( pipe_slow );
3000 %}
3001 
3002 instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
3003   predicate(UseAVX > 0);
3004   match(Set dst (MulReductionVF src1 src2));
3005   effect(TEMP tmp, TEMP tmp2);
3006   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
3007             "pshufd  $tmp,$src2,0x01\n\t"
3008             "vmulss  $tmp2,$tmp2,$tmp\n\t"
3009             "pshufd  $tmp,$src2,0x02\n\t"
3010             "vmulss  $tmp2,$tmp2,$tmp\n\t"
3011             "pshufd  $tmp,$src2,0x03\n\t"
3012             "vmulss  $dst,$tmp2,$tmp\t! add reduction4F" %}
3013   ins_encode %{
3014     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3015     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
3016     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
3017     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
3018     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
3019     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
3020     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
3021   %}
3022   ins_pipe( pipe_slow );
3023 %}
3024 
3025 instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
3026   predicate(UseAVX > 0);
3027   match(Set dst (MulReductionVF src1 src2));
3028   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
3029   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
3030             "pshufd  $tmp,$src2,0x01\n\t"
3031             "vmulss  $tmp2,$tmp2,$tmp\n\t"
3032             "pshufd  $tmp,$src2,0x02\n\t"


3044   ins_encode %{
3045     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3046     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
3047     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
3048     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
3049     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
3050     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
3051     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
3052     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
3053     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
3054     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
3055     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
3056     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
3057     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
3058     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
3059     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
3060   %}
3061   ins_pipe( pipe_slow );
3062 %}
3063 







































































3064 instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
3065   predicate(UseSSE >= 1 && UseAVX == 0);
3066   match(Set dst (MulReductionVD src1 src2));
3067   effect(TEMP tmp, TEMP dst);
3068   format %{ "movdqu  $tmp,$src1\n\t"
3069             "mulsd   $tmp,$src2\n\t"
3070             "pshufd  $dst,$src2,0xE\n\t"
3071             "mulsd   $dst,$tmp\t! add reduction2D" %}
3072   ins_encode %{
3073     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
3074     __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister);
3075     __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
3076     __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister);
3077   %}
3078   ins_pipe( pipe_slow );
3079 %}
3080 
3081 instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
3082   predicate(UseAVX > 0);
3083   match(Set dst (MulReductionVD src1 src2));
3084   effect(TEMP tmp, TEMP tmp2);
3085   format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
3086             "pshufd  $tmp,$src2,0xE\n\t"
3087             "vmulsd  $dst,$tmp2,$tmp\t! mul reduction2D" %}
3088   ins_encode %{
3089     __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3090     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
3091     __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);


3099   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
3100   format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
3101             "pshufd  $tmp,$src2,0xE\n\t"
3102             "vmulsd  $tmp2,$tmp2,$tmp\n\t"
3103             "vextractf128  $tmp3,$src2\n\t"
3104             "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
3105             "pshufd  $tmp,$tmp3,0xE\n\t"
3106             "vmulsd  $dst,$tmp2,$tmp\t! mul reduction4D" %}
3107   ins_encode %{
3108     __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3109     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
3110     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
3111     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
3112     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
3113     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
3114     __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
3115   %}
3116   ins_pipe( pipe_slow );
3117 %}
3118 







































3119 // ====================VECTOR ARITHMETIC=======================================
3120 
3121 // --------------------------------- ADD --------------------------------------
3122 
3123 // Bytes vector add
3124 instruct vadd4B(vecS dst, vecS src) %{
3125   predicate(n->as_Vector()->length() == 4);
3126   match(Set dst (AddVB dst src));
3127   format %{ "paddb   $dst,$src\t! add packed4B" %}
3128   ins_encode %{
3129     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
3130   %}
3131   ins_pipe( pipe_slow );
3132 %}
3133 
3134 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
3135   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3136   match(Set dst (AddVB src1 src2));
3137   format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
3138   ins_encode %{
3139     bool vector256 = false;
3140     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3141   %}
3142   ins_pipe( pipe_slow );
3143 %}
3144 
3145 instruct vadd8B(vecD dst, vecD src) %{
3146   predicate(n->as_Vector()->length() == 8);
3147   match(Set dst (AddVB dst src));
3148   format %{ "paddb   $dst,$src\t! add packed8B" %}
3149   ins_encode %{
3150     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
3151   %}
3152   ins_pipe( pipe_slow );
3153 %}
3154 
3155 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
3156   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3157   match(Set dst (AddVB src1 src2));
3158   format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
3159   ins_encode %{
3160     bool vector256 = false;
3161     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3162   %}
3163   ins_pipe( pipe_slow );
3164 %}
3165 
3166 instruct vadd16B(vecX dst, vecX src) %{
3167   predicate(n->as_Vector()->length() == 16);
3168   match(Set dst (AddVB dst src));
3169   format %{ "paddb   $dst,$src\t! add packed16B" %}
3170   ins_encode %{
3171     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
3172   %}
3173   ins_pipe( pipe_slow );
3174 %}
3175 
3176 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
3177   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
3178   match(Set dst (AddVB src1 src2));
3179   format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
3180   ins_encode %{
3181     bool vector256 = false;
3182     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3183   %}
3184   ins_pipe( pipe_slow );
3185 %}
3186 
3187 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
3188   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
3189   match(Set dst (AddVB src (LoadVector mem)));
3190   format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
3191   ins_encode %{
3192     bool vector256 = false;
3193     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3194   %}
3195   ins_pipe( pipe_slow );
3196 %}
3197 
3198 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
3199   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
3200   match(Set dst (AddVB src1 src2));
3201   format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
3202   ins_encode %{
3203     bool vector256 = true;
3204     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3205   %}
3206   ins_pipe( pipe_slow );
3207 %}
3208 
3209 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
3210   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
3211   match(Set dst (AddVB src (LoadVector mem)));
3212   format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
3213   ins_encode %{
3214     bool vector256 = true;
3215     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);






















3216   %}
3217   ins_pipe( pipe_slow );
3218 %}
3219 
3220 // Shorts/Chars vector add
3221 instruct vadd2S(vecS dst, vecS src) %{
3222   predicate(n->as_Vector()->length() == 2);
3223   match(Set dst (AddVS dst src));
3224   format %{ "paddw   $dst,$src\t! add packed2S" %}
3225   ins_encode %{
3226     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
3227   %}
3228   ins_pipe( pipe_slow );
3229 %}
3230 
3231 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
3232   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3233   match(Set dst (AddVS src1 src2));
3234   format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
3235   ins_encode %{
3236     bool vector256 = false;
3237     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3238   %}
3239   ins_pipe( pipe_slow );
3240 %}
3241 
3242 instruct vadd4S(vecD dst, vecD src) %{
3243   predicate(n->as_Vector()->length() == 4);
3244   match(Set dst (AddVS dst src));
3245   format %{ "paddw   $dst,$src\t! add packed4S" %}
3246   ins_encode %{
3247     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
3248   %}
3249   ins_pipe( pipe_slow );
3250 %}
3251 
3252 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
3253   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3254   match(Set dst (AddVS src1 src2));
3255   format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
3256   ins_encode %{
3257     bool vector256 = false;
3258     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3259   %}
3260   ins_pipe( pipe_slow );
3261 %}
3262 
3263 instruct vadd8S(vecX dst, vecX src) %{
3264   predicate(n->as_Vector()->length() == 8);
3265   match(Set dst (AddVS dst src));
3266   format %{ "paddw   $dst,$src\t! add packed8S" %}
3267   ins_encode %{
3268     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
3269   %}
3270   ins_pipe( pipe_slow );
3271 %}
3272 
3273 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
3274   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3275   match(Set dst (AddVS src1 src2));
3276   format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
3277   ins_encode %{
3278     bool vector256 = false;
3279     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3280   %}
3281   ins_pipe( pipe_slow );
3282 %}
3283 
3284 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
3285   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3286   match(Set dst (AddVS src (LoadVector mem)));
3287   format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
3288   ins_encode %{
3289     bool vector256 = false;
3290     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3291   %}
3292   ins_pipe( pipe_slow );
3293 %}
3294 
3295 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
3296   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3297   match(Set dst (AddVS src1 src2));
3298   format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
3299   ins_encode %{
3300     bool vector256 = true;
3301     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3302   %}
3303   ins_pipe( pipe_slow );
3304 %}
3305 
3306 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
3307   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3308   match(Set dst (AddVS src (LoadVector mem)));
3309   format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
3310   ins_encode %{
3311     bool vector256 = true;
3312     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);






















3313   %}
3314   ins_pipe( pipe_slow );
3315 %}
3316 
3317 // Integers vector add
3318 instruct vadd2I(vecD dst, vecD src) %{
3319   predicate(n->as_Vector()->length() == 2);
3320   match(Set dst (AddVI dst src));
3321   format %{ "paddd   $dst,$src\t! add packed2I" %}
3322   ins_encode %{
3323     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
3324   %}
3325   ins_pipe( pipe_slow );
3326 %}
3327 
3328 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
3329   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3330   match(Set dst (AddVI src1 src2));
3331   format %{ "vpaddd  $dst,$src1,$src2\t! add packed2I" %}
3332   ins_encode %{
3333     bool vector256 = false;
3334     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3335   %}
3336   ins_pipe( pipe_slow );
3337 %}
3338 
3339 instruct vadd4I(vecX dst, vecX src) %{
3340   predicate(n->as_Vector()->length() == 4);
3341   match(Set dst (AddVI dst src));
3342   format %{ "paddd   $dst,$src\t! add packed4I" %}
3343   ins_encode %{
3344     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
3345   %}
3346   ins_pipe( pipe_slow );
3347 %}
3348 
3349 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
3350   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3351   match(Set dst (AddVI src1 src2));
3352   format %{ "vpaddd  $dst,$src1,$src2\t! add packed4I" %}
3353   ins_encode %{
3354     bool vector256 = false;
3355     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3356   %}
3357   ins_pipe( pipe_slow );
3358 %}
3359 
3360 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
3361   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3362   match(Set dst (AddVI src (LoadVector mem)));
3363   format %{ "vpaddd  $dst,$src,$mem\t! add packed4I" %}
3364   ins_encode %{
3365     bool vector256 = false;
3366     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3367   %}
3368   ins_pipe( pipe_slow );
3369 %}
3370 
3371 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
3372   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3373   match(Set dst (AddVI src1 src2));
3374   format %{ "vpaddd  $dst,$src1,$src2\t! add packed8I" %}
3375   ins_encode %{
3376     bool vector256 = true;
3377     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3378   %}
3379   ins_pipe( pipe_slow );
3380 %}
3381 
3382 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
3383   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3384   match(Set dst (AddVI src (LoadVector mem)));
3385   format %{ "vpaddd  $dst,$src,$mem\t! add packed8I" %}
3386   ins_encode %{
3387     bool vector256 = true;
3388     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);






















3389   %}
3390   ins_pipe( pipe_slow );
3391 %}
3392 
3393 // Longs vector add
3394 instruct vadd2L(vecX dst, vecX src) %{
3395   predicate(n->as_Vector()->length() == 2);
3396   match(Set dst (AddVL dst src));
3397   format %{ "paddq   $dst,$src\t! add packed2L" %}
3398   ins_encode %{
3399     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
3400   %}
3401   ins_pipe( pipe_slow );
3402 %}
3403 
3404 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
3405   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3406   match(Set dst (AddVL src1 src2));
3407   format %{ "vpaddq  $dst,$src1,$src2\t! add packed2L" %}
3408   ins_encode %{
3409     bool vector256 = false;
3410     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3411   %}
3412   ins_pipe( pipe_slow );
3413 %}
3414 
3415 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
3416   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3417   match(Set dst (AddVL src (LoadVector mem)));
3418   format %{ "vpaddq  $dst,$src,$mem\t! add packed2L" %}
3419   ins_encode %{
3420     bool vector256 = false;
3421     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3422   %}
3423   ins_pipe( pipe_slow );
3424 %}
3425 
3426 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
3427   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3428   match(Set dst (AddVL src1 src2));
3429   format %{ "vpaddq  $dst,$src1,$src2\t! add packed4L" %}
3430   ins_encode %{
3431     bool vector256 = true;
3432     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3433   %}
3434   ins_pipe( pipe_slow );
3435 %}
3436 
3437 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
3438   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3439   match(Set dst (AddVL src (LoadVector mem)));
3440   format %{ "vpaddq  $dst,$src,$mem\t! add packed4L" %}
3441   ins_encode %{
3442     bool vector256 = true;
3443     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);






















3444   %}
3445   ins_pipe( pipe_slow );
3446 %}
3447 
3448 // Floats vector add
3449 instruct vadd2F(vecD dst, vecD src) %{
3450   predicate(n->as_Vector()->length() == 2);
3451   match(Set dst (AddVF dst src));
3452   format %{ "addps   $dst,$src\t! add packed2F" %}
3453   ins_encode %{
3454     __ addps($dst$$XMMRegister, $src$$XMMRegister);
3455   %}
3456   ins_pipe( pipe_slow );
3457 %}
3458 
3459 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
3460   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3461   match(Set dst (AddVF src1 src2));
3462   format %{ "vaddps  $dst,$src1,$src2\t! add packed2F" %}
3463   ins_encode %{
3464     bool vector256 = false;
3465     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3466   %}
3467   ins_pipe( pipe_slow );
3468 %}
3469 
3470 instruct vadd4F(vecX dst, vecX src) %{
3471   predicate(n->as_Vector()->length() == 4);
3472   match(Set dst (AddVF dst src));
3473   format %{ "addps   $dst,$src\t! add packed4F" %}
3474   ins_encode %{
3475     __ addps($dst$$XMMRegister, $src$$XMMRegister);
3476   %}
3477   ins_pipe( pipe_slow );
3478 %}
3479 
3480 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
3481   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3482   match(Set dst (AddVF src1 src2));
3483   format %{ "vaddps  $dst,$src1,$src2\t! add packed4F" %}
3484   ins_encode %{
3485     bool vector256 = false;
3486     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3487   %}
3488   ins_pipe( pipe_slow );
3489 %}
3490 
3491 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
3492   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3493   match(Set dst (AddVF src (LoadVector mem)));
3494   format %{ "vaddps  $dst,$src,$mem\t! add packed4F" %}
3495   ins_encode %{
3496     bool vector256 = false;
3497     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3498   %}
3499   ins_pipe( pipe_slow );
3500 %}
3501 
3502 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
3503   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3504   match(Set dst (AddVF src1 src2));
3505   format %{ "vaddps  $dst,$src1,$src2\t! add packed8F" %}
3506   ins_encode %{
3507     bool vector256 = true;
3508     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3509   %}
3510   ins_pipe( pipe_slow );
3511 %}
3512 
3513 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
3514   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3515   match(Set dst (AddVF src (LoadVector mem)));
3516   format %{ "vaddps  $dst,$src,$mem\t! add packed8F" %}
3517   ins_encode %{
3518     bool vector256 = true;
3519     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);






















3520   %}
3521   ins_pipe( pipe_slow );
3522 %}
3523 
3524 // Doubles vector add
3525 instruct vadd2D(vecX dst, vecX src) %{
3526   predicate(n->as_Vector()->length() == 2);
3527   match(Set dst (AddVD dst src));
3528   format %{ "addpd   $dst,$src\t! add packed2D" %}
3529   ins_encode %{
3530     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
3531   %}
3532   ins_pipe( pipe_slow );
3533 %}
3534 
3535 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
3536   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3537   match(Set dst (AddVD src1 src2));
3538   format %{ "vaddpd  $dst,$src1,$src2\t! add packed2D" %}
3539   ins_encode %{
3540     bool vector256 = false;
3541     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3542   %}
3543   ins_pipe( pipe_slow );
3544 %}
3545 
3546 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
3547   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3548   match(Set dst (AddVD src (LoadVector mem)));
3549   format %{ "vaddpd  $dst,$src,$mem\t! add packed2D" %}
3550   ins_encode %{
3551     bool vector256 = false;
3552     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3553   %}
3554   ins_pipe( pipe_slow );
3555 %}
3556 
3557 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
3558   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3559   match(Set dst (AddVD src1 src2));
3560   format %{ "vaddpd  $dst,$src1,$src2\t! add packed4D" %}
3561   ins_encode %{
3562     bool vector256 = true;
3563     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3564   %}
3565   ins_pipe( pipe_slow );
3566 %}
3567 
3568 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
3569   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3570   match(Set dst (AddVD src (LoadVector mem)));
3571   format %{ "vaddpd  $dst,$src,$mem\t! add packed4D" %}
3572   ins_encode %{
3573     bool vector256 = true;
3574     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);






















3575   %}
3576   ins_pipe( pipe_slow );
3577 %}
3578 
3579 // --------------------------------- SUB --------------------------------------
3580 
3581 // Bytes vector sub
3582 instruct vsub4B(vecS dst, vecS src) %{
3583   predicate(n->as_Vector()->length() == 4);
3584   match(Set dst (SubVB dst src));
3585   format %{ "psubb   $dst,$src\t! sub packed4B" %}
3586   ins_encode %{
3587     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
3588   %}
3589   ins_pipe( pipe_slow );
3590 %}
3591 
3592 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
3593   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3594   match(Set dst (SubVB src1 src2));
3595   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
3596   ins_encode %{
3597     bool vector256 = false;
3598     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3599   %}
3600   ins_pipe( pipe_slow );
3601 %}
3602 
3603 instruct vsub8B(vecD dst, vecD src) %{
3604   predicate(n->as_Vector()->length() == 8);
3605   match(Set dst (SubVB dst src));
3606   format %{ "psubb   $dst,$src\t! sub packed8B" %}
3607   ins_encode %{
3608     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
3609   %}
3610   ins_pipe( pipe_slow );
3611 %}
3612 
3613 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
3614   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3615   match(Set dst (SubVB src1 src2));
3616   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
3617   ins_encode %{
3618     bool vector256 = false;
3619     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3620   %}
3621   ins_pipe( pipe_slow );
3622 %}
3623 
3624 instruct vsub16B(vecX dst, vecX src) %{
3625   predicate(n->as_Vector()->length() == 16);
3626   match(Set dst (SubVB dst src));
3627   format %{ "psubb   $dst,$src\t! sub packed16B" %}
3628   ins_encode %{
3629     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
3630   %}
3631   ins_pipe( pipe_slow );
3632 %}
3633 
3634 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
3635   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
3636   match(Set dst (SubVB src1 src2));
3637   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
3638   ins_encode %{
3639     bool vector256 = false;
3640     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3641   %}
3642   ins_pipe( pipe_slow );
3643 %}
3644 
3645 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
3646   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
3647   match(Set dst (SubVB src (LoadVector mem)));
3648   format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
3649   ins_encode %{
3650     bool vector256 = false;
3651     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3652   %}
3653   ins_pipe( pipe_slow );
3654 %}
3655 
3656 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
3657   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
3658   match(Set dst (SubVB src1 src2));
3659   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
3660   ins_encode %{
3661     bool vector256 = true;
3662     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3663   %}
3664   ins_pipe( pipe_slow );
3665 %}
3666 
3667 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
3668   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
3669   match(Set dst (SubVB src (LoadVector mem)));
3670   format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
3671   ins_encode %{
3672     bool vector256 = true;
3673     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);






















3674   %}
3675   ins_pipe( pipe_slow );
3676 %}
3677 
3678 // Shorts/Chars vector sub
3679 instruct vsub2S(vecS dst, vecS src) %{
3680   predicate(n->as_Vector()->length() == 2);
3681   match(Set dst (SubVS dst src));
3682   format %{ "psubw   $dst,$src\t! sub packed2S" %}
3683   ins_encode %{
3684     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
3685   %}
3686   ins_pipe( pipe_slow );
3687 %}
3688 
3689 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
3690   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3691   match(Set dst (SubVS src1 src2));
3692   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
3693   ins_encode %{
3694     bool vector256 = false;
3695     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3696   %}
3697   ins_pipe( pipe_slow );
3698 %}
3699 
3700 instruct vsub4S(vecD dst, vecD src) %{
3701   predicate(n->as_Vector()->length() == 4);
3702   match(Set dst (SubVS dst src));
3703   format %{ "psubw   $dst,$src\t! sub packed4S" %}
3704   ins_encode %{
3705     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
3706   %}
3707   ins_pipe( pipe_slow );
3708 %}
3709 
3710 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
3711   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3712   match(Set dst (SubVS src1 src2));
3713   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
3714   ins_encode %{
3715     bool vector256 = false;
3716     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3717   %}
3718   ins_pipe( pipe_slow );
3719 %}
3720 
3721 instruct vsub8S(vecX dst, vecX src) %{
3722   predicate(n->as_Vector()->length() == 8);
3723   match(Set dst (SubVS dst src));
3724   format %{ "psubw   $dst,$src\t! sub packed8S" %}
3725   ins_encode %{
3726     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
3727   %}
3728   ins_pipe( pipe_slow );
3729 %}
3730 
3731 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
3732   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3733   match(Set dst (SubVS src1 src2));
3734   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
3735   ins_encode %{
3736     bool vector256 = false;
3737     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3738   %}
3739   ins_pipe( pipe_slow );
3740 %}
3741 
3742 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
3743   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3744   match(Set dst (SubVS src (LoadVector mem)));
3745   format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
3746   ins_encode %{
3747     bool vector256 = false;
3748     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3749   %}
3750   ins_pipe( pipe_slow );
3751 %}
3752 
3753 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
3754   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3755   match(Set dst (SubVS src1 src2));
3756   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
3757   ins_encode %{
3758     bool vector256 = true;
3759     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3760   %}
3761   ins_pipe( pipe_slow );
3762 %}
3763 
3764 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
3765   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3766   match(Set dst (SubVS src (LoadVector mem)));
3767   format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
3768   ins_encode %{
3769     bool vector256 = true;
3770     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);






















3771   %}
3772   ins_pipe( pipe_slow );
3773 %}
3774 
3775 // Integers vector sub
3776 instruct vsub2I(vecD dst, vecD src) %{
3777   predicate(n->as_Vector()->length() == 2);
3778   match(Set dst (SubVI dst src));
3779   format %{ "psubd   $dst,$src\t! sub packed2I" %}
3780   ins_encode %{
3781     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
3782   %}
3783   ins_pipe( pipe_slow );
3784 %}
3785 
3786 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
3787   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3788   match(Set dst (SubVI src1 src2));
3789   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed2I" %}
3790   ins_encode %{
3791     bool vector256 = false;
3792     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3793   %}
3794   ins_pipe( pipe_slow );
3795 %}
3796 
3797 instruct vsub4I(vecX dst, vecX src) %{
3798   predicate(n->as_Vector()->length() == 4);
3799   match(Set dst (SubVI dst src));
3800   format %{ "psubd   $dst,$src\t! sub packed4I" %}
3801   ins_encode %{
3802     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
3803   %}
3804   ins_pipe( pipe_slow );
3805 %}
3806 
3807 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
3808   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3809   match(Set dst (SubVI src1 src2));
3810   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed4I" %}
3811   ins_encode %{
3812     bool vector256 = false;
3813     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3814   %}
3815   ins_pipe( pipe_slow );
3816 %}
3817 
3818 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
3819   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3820   match(Set dst (SubVI src (LoadVector mem)));
3821   format %{ "vpsubd  $dst,$src,$mem\t! sub packed4I" %}
3822   ins_encode %{
3823     bool vector256 = false;
3824     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3825   %}
3826   ins_pipe( pipe_slow );
3827 %}
3828 
3829 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
3830   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3831   match(Set dst (SubVI src1 src2));
3832   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed8I" %}
3833   ins_encode %{
3834     bool vector256 = true;
3835     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3836   %}
3837   ins_pipe( pipe_slow );
3838 %}
3839 
3840 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
3841   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3842   match(Set dst (SubVI src (LoadVector mem)));
3843   format %{ "vpsubd  $dst,$src,$mem\t! sub packed8I" %}
3844   ins_encode %{
3845     bool vector256 = true;
3846     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);






















3847   %}
3848   ins_pipe( pipe_slow );
3849 %}
3850 
3851 // Longs vector sub
3852 instruct vsub2L(vecX dst, vecX src) %{
3853   predicate(n->as_Vector()->length() == 2);
3854   match(Set dst (SubVL dst src));
3855   format %{ "psubq   $dst,$src\t! sub packed2L" %}
3856   ins_encode %{
3857     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
3858   %}
3859   ins_pipe( pipe_slow );
3860 %}
3861 
3862 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
3863   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3864   match(Set dst (SubVL src1 src2));
3865   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed2L" %}
3866   ins_encode %{
3867     bool vector256 = false;
3868     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3869   %}
3870   ins_pipe( pipe_slow );
3871 %}
3872 
3873 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
3874   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3875   match(Set dst (SubVL src (LoadVector mem)));
3876   format %{ "vpsubq  $dst,$src,$mem\t! sub packed2L" %}
3877   ins_encode %{
3878     bool vector256 = false;
3879     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3880   %}
3881   ins_pipe( pipe_slow );
3882 %}
3883 
3884 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
3885   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3886   match(Set dst (SubVL src1 src2));
3887   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed4L" %}
3888   ins_encode %{
3889     bool vector256 = true;
3890     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3891   %}
3892   ins_pipe( pipe_slow );
3893 %}
3894 
3895 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
3896   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3897   match(Set dst (SubVL src (LoadVector mem)));
3898   format %{ "vpsubq  $dst,$src,$mem\t! sub packed4L" %}






















3899   ins_encode %{
3900     bool vector256 = true;
3901     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3902   %}
3903   ins_pipe( pipe_slow );
3904 %}
3905 
3906 // Floats vector sub
3907 instruct vsub2F(vecD dst, vecD src) %{
3908   predicate(n->as_Vector()->length() == 2);
3909   match(Set dst (SubVF dst src));
3910   format %{ "subps   $dst,$src\t! sub packed2F" %}
3911   ins_encode %{
3912     __ subps($dst$$XMMRegister, $src$$XMMRegister);
3913   %}
3914   ins_pipe( pipe_slow );
3915 %}
3916 
3917 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
3918   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3919   match(Set dst (SubVF src1 src2));
3920   format %{ "vsubps  $dst,$src1,$src2\t! sub packed2F" %}
3921   ins_encode %{
3922     bool vector256 = false;
3923     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3924   %}
3925   ins_pipe( pipe_slow );
3926 %}
3927 
3928 instruct vsub4F(vecX dst, vecX src) %{
3929   predicate(n->as_Vector()->length() == 4);
3930   match(Set dst (SubVF dst src));
3931   format %{ "subps   $dst,$src\t! sub packed4F" %}
3932   ins_encode %{
3933     __ subps($dst$$XMMRegister, $src$$XMMRegister);
3934   %}
3935   ins_pipe( pipe_slow );
3936 %}
3937 
3938 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
3939   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3940   match(Set dst (SubVF src1 src2));
3941   format %{ "vsubps  $dst,$src1,$src2\t! sub packed4F" %}
3942   ins_encode %{
3943     bool vector256 = false;
3944     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3945   %}
3946   ins_pipe( pipe_slow );
3947 %}
3948 
3949 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
3950   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3951   match(Set dst (SubVF src (LoadVector mem)));
3952   format %{ "vsubps  $dst,$src,$mem\t! sub packed4F" %}
3953   ins_encode %{
3954     bool vector256 = false;
3955     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3956   %}
3957   ins_pipe( pipe_slow );
3958 %}
3959 
3960 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
3961   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3962   match(Set dst (SubVF src1 src2));
3963   format %{ "vsubps  $dst,$src1,$src2\t! sub packed8F" %}
3964   ins_encode %{
3965     bool vector256 = true;
3966     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3967   %}
3968   ins_pipe( pipe_slow );
3969 %}
3970 
3971 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
3972   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3973   match(Set dst (SubVF src (LoadVector mem)));
3974   format %{ "vsubps  $dst,$src,$mem\t! sub packed8F" %}
3975   ins_encode %{
3976     bool vector256 = true;
3977     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);






















3978   %}
3979   ins_pipe( pipe_slow );
3980 %}
3981 
3982 // Doubles vector sub
3983 instruct vsub2D(vecX dst, vecX src) %{
3984   predicate(n->as_Vector()->length() == 2);
3985   match(Set dst (SubVD dst src));
3986   format %{ "subpd   $dst,$src\t! sub packed2D" %}
3987   ins_encode %{
3988     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
3989   %}
3990   ins_pipe( pipe_slow );
3991 %}
3992 
3993 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
3994   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3995   match(Set dst (SubVD src1 src2));
3996   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed2D" %}
3997   ins_encode %{
3998     bool vector256 = false;
3999     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4000   %}
4001   ins_pipe( pipe_slow );
4002 %}
4003 
4004 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
4005   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4006   match(Set dst (SubVD src (LoadVector mem)));
4007   format %{ "vsubpd  $dst,$src,$mem\t! sub packed2D" %}
4008   ins_encode %{
4009     bool vector256 = false;
4010     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4011   %}
4012   ins_pipe( pipe_slow );
4013 %}
4014 
4015 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
4016   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4017   match(Set dst (SubVD src1 src2));
4018   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed4D" %}
4019   ins_encode %{
4020     bool vector256 = true;
4021     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4022   %}
4023   ins_pipe( pipe_slow );
4024 %}
4025 
4026 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
4027   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4028   match(Set dst (SubVD src (LoadVector mem)));
4029   format %{ "vsubpd  $dst,$src,$mem\t! sub packed4D" %}
4030   ins_encode %{
4031     bool vector256 = true;
4032     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);






















4033   %}
4034   ins_pipe( pipe_slow );
4035 %}
4036 
4037 // --------------------------------- MUL --------------------------------------
4038 
4039 // Shorts/Chars vector mul
4040 instruct vmul2S(vecS dst, vecS src) %{
4041   predicate(n->as_Vector()->length() == 2);
4042   match(Set dst (MulVS dst src));
4043   format %{ "pmullw $dst,$src\t! mul packed2S" %}
4044   ins_encode %{
4045     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
4046   %}
4047   ins_pipe( pipe_slow );
4048 %}
4049 
4050 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
4051   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4052   match(Set dst (MulVS src1 src2));
4053   format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
4054   ins_encode %{
4055     bool vector256 = false;
4056     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4057   %}
4058   ins_pipe( pipe_slow );
4059 %}
4060 
4061 instruct vmul4S(vecD dst, vecD src) %{
4062   predicate(n->as_Vector()->length() == 4);
4063   match(Set dst (MulVS dst src));
4064   format %{ "pmullw  $dst,$src\t! mul packed4S" %}
4065   ins_encode %{
4066     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
4067   %}
4068   ins_pipe( pipe_slow );
4069 %}
4070 
4071 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
4072   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4073   match(Set dst (MulVS src1 src2));
4074   format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
4075   ins_encode %{
4076     bool vector256 = false;
4077     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4078   %}
4079   ins_pipe( pipe_slow );
4080 %}
4081 
4082 instruct vmul8S(vecX dst, vecX src) %{
4083   predicate(n->as_Vector()->length() == 8);
4084   match(Set dst (MulVS dst src));
4085   format %{ "pmullw  $dst,$src\t! mul packed8S" %}
4086   ins_encode %{
4087     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
4088   %}
4089   ins_pipe( pipe_slow );
4090 %}
4091 
4092 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
4093   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4094   match(Set dst (MulVS src1 src2));
4095   format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
4096   ins_encode %{
4097     bool vector256 = false;
4098     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4099   %}
4100   ins_pipe( pipe_slow );
4101 %}
4102 
4103 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
4104   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4105   match(Set dst (MulVS src (LoadVector mem)));
4106   format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
4107   ins_encode %{
4108     bool vector256 = false;
4109     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4110   %}
4111   ins_pipe( pipe_slow );
4112 %}
4113 
4114 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
4115   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4116   match(Set dst (MulVS src1 src2));
4117   format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
4118   ins_encode %{
4119     bool vector256 = true;
4120     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4121   %}
4122   ins_pipe( pipe_slow );
4123 %}
4124 
4125 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
4126   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4127   match(Set dst (MulVS src (LoadVector mem)));
4128   format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
4129   ins_encode %{
4130     bool vector256 = true;
4131     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);






















4132   %}
4133   ins_pipe( pipe_slow );
4134 %}
4135 
4136 // Integers vector mul (sse4_1)
4137 instruct vmul2I(vecD dst, vecD src) %{
4138   predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
4139   match(Set dst (MulVI dst src));
4140   format %{ "pmulld  $dst,$src\t! mul packed2I" %}
4141   ins_encode %{
4142     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
4143   %}
4144   ins_pipe( pipe_slow );
4145 %}
4146 
4147 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
4148   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4149   match(Set dst (MulVI src1 src2));
4150   format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
4151   ins_encode %{
4152     bool vector256 = false;
4153     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);











4154   %}
4155   ins_pipe( pipe_slow );
4156 %}
4157 
4158 instruct vmul4I(vecX dst, vecX src) %{
4159   predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
4160   match(Set dst (MulVI dst src));
4161   format %{ "pmulld  $dst,$src\t! mul packed4I" %}
4162   ins_encode %{
4163     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
4164   %}
4165   ins_pipe( pipe_slow );
4166 %}
4167 
4168 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
4169   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4170   match(Set dst (MulVI src1 src2));
4171   format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
4172   ins_encode %{
4173     bool vector256 = false;
4174     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4175   %}
4176   ins_pipe( pipe_slow );
4177 %}
4178 
4179 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
4180   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4181   match(Set dst (MulVI src (LoadVector mem)));
4182   format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
4183   ins_encode %{
4184     bool vector256 = false;
4185     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);






















4186   %}
4187   ins_pipe( pipe_slow );
4188 %}
4189 
4190 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
4191   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4192   match(Set dst (MulVI src1 src2));
4193   format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
4194   ins_encode %{
4195     bool vector256 = true;
4196     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);






















4197   %}
4198   ins_pipe( pipe_slow );
4199 %}
4200 
4201 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
4202   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4203   match(Set dst (MulVI src (LoadVector mem)));
4204   format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
4205   ins_encode %{
4206     bool vector256 = true;
4207     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);






















4208   %}
4209   ins_pipe( pipe_slow );
4210 %}
4211 
4212 // Floats vector mul
4213 instruct vmul2F(vecD dst, vecD src) %{
4214   predicate(n->as_Vector()->length() == 2);
4215   match(Set dst (MulVF dst src));
4216   format %{ "mulps   $dst,$src\t! mul packed2F" %}
4217   ins_encode %{
4218     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
4219   %}
4220   ins_pipe( pipe_slow );
4221 %}
4222 
4223 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
4224   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4225   match(Set dst (MulVF src1 src2));
4226   format %{ "vmulps  $dst,$src1,$src2\t! mul packed2F" %}
4227   ins_encode %{
4228     bool vector256 = false;
4229     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4230   %}
4231   ins_pipe( pipe_slow );
4232 %}
4233 
4234 instruct vmul4F(vecX dst, vecX src) %{
4235   predicate(n->as_Vector()->length() == 4);
4236   match(Set dst (MulVF dst src));
4237   format %{ "mulps   $dst,$src\t! mul packed4F" %}
4238   ins_encode %{
4239     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
4240   %}
4241   ins_pipe( pipe_slow );
4242 %}
4243 
4244 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
4245   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4246   match(Set dst (MulVF src1 src2));
4247   format %{ "vmulps  $dst,$src1,$src2\t! mul packed4F" %}
4248   ins_encode %{
4249     bool vector256 = false;
4250     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4251   %}
4252   ins_pipe( pipe_slow );
4253 %}
4254 
4255 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
4256   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4257   match(Set dst (MulVF src (LoadVector mem)));
4258   format %{ "vmulps  $dst,$src,$mem\t! mul packed4F" %}
4259   ins_encode %{
4260     bool vector256 = false;
4261     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4262   %}
4263   ins_pipe( pipe_slow );
4264 %}
4265 
4266 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
4267   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4268   match(Set dst (MulVF src1 src2));
4269   format %{ "vmulps  $dst,$src1,$src2\t! mul packed8F" %}
4270   ins_encode %{
4271     bool vector256 = true;
4272     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4273   %}
4274   ins_pipe( pipe_slow );
4275 %}
4276 
4277 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
4278   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4279   match(Set dst (MulVF src (LoadVector mem)));
4280   format %{ "vmulps  $dst,$src,$mem\t! mul packed8F" %}
4281   ins_encode %{
4282     bool vector256 = true;
4283     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);






















4284   %}
4285   ins_pipe( pipe_slow );
4286 %}
4287 
4288 // Doubles vector mul
4289 instruct vmul2D(vecX dst, vecX src) %{
4290   predicate(n->as_Vector()->length() == 2);
4291   match(Set dst (MulVD dst src));
4292   format %{ "mulpd   $dst,$src\t! mul packed2D" %}
4293   ins_encode %{
4294     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
4295   %}
4296   ins_pipe( pipe_slow );
4297 %}
4298 
4299 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
4300   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4301   match(Set dst (MulVD src1 src2));
4302   format %{ "vmulpd  $dst,$src1,$src2\t! mul packed2D" %}
4303   ins_encode %{
4304     bool vector256 = false;
4305     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4306   %}
4307   ins_pipe( pipe_slow );
4308 %}
4309 
4310 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
4311   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4312   match(Set dst (MulVD src (LoadVector mem)));
4313   format %{ "vmulpd  $dst,$src,$mem\t! mul packed2D" %}
4314   ins_encode %{
4315     bool vector256 = false;
4316     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4317   %}
4318   ins_pipe( pipe_slow );
4319 %}
4320 
4321 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
4322   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4323   match(Set dst (MulVD src1 src2));
4324   format %{ "vmulpd  $dst,$src1,$src2\t! mul packed4D" %}
4325   ins_encode %{
4326     bool vector256 = true;
4327     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4328   %}
4329   ins_pipe( pipe_slow );
4330 %}
4331 
4332 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
4333   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4334   match(Set dst (MulVD src (LoadVector mem)));
4335   format %{ "vmulpd  $dst,$src,$mem\t! mul packed4D" %}
4336   ins_encode %{
4337     bool vector256 = true;
4338     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);






















4339   %}
4340   ins_pipe( pipe_slow );
4341 %}
4342 
4343 // --------------------------------- DIV --------------------------------------
4344 
4345 // Floats vector div
4346 instruct vdiv2F(vecD dst, vecD src) %{
4347   predicate(n->as_Vector()->length() == 2);
4348   match(Set dst (DivVF dst src));
4349   format %{ "divps   $dst,$src\t! div packed2F" %}
4350   ins_encode %{
4351     __ divps($dst$$XMMRegister, $src$$XMMRegister);
4352   %}
4353   ins_pipe( pipe_slow );
4354 %}
4355 
4356 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
4357   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4358   match(Set dst (DivVF src1 src2));
4359   format %{ "vdivps  $dst,$src1,$src2\t! div packed2F" %}
4360   ins_encode %{
4361     bool vector256 = false;
4362     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4363   %}
4364   ins_pipe( pipe_slow );
4365 %}
4366 
4367 instruct vdiv4F(vecX dst, vecX src) %{
4368   predicate(n->as_Vector()->length() == 4);
4369   match(Set dst (DivVF dst src));
4370   format %{ "divps   $dst,$src\t! div packed4F" %}
4371   ins_encode %{
4372     __ divps($dst$$XMMRegister, $src$$XMMRegister);
4373   %}
4374   ins_pipe( pipe_slow );
4375 %}
4376 
4377 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
4378   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4379   match(Set dst (DivVF src1 src2));
4380   format %{ "vdivps  $dst,$src1,$src2\t! div packed4F" %}
4381   ins_encode %{
4382     bool vector256 = false;
4383     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4384   %}
4385   ins_pipe( pipe_slow );
4386 %}
4387 
4388 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
4389   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4390   match(Set dst (DivVF src (LoadVector mem)));
4391   format %{ "vdivps  $dst,$src,$mem\t! div packed4F" %}
4392   ins_encode %{
4393     bool vector256 = false;
4394     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4395   %}
4396   ins_pipe( pipe_slow );
4397 %}
4398 
4399 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
4400   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4401   match(Set dst (DivVF src1 src2));
4402   format %{ "vdivps  $dst,$src1,$src2\t! div packed8F" %}
4403   ins_encode %{
4404     bool vector256 = true;
4405     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4406   %}
4407   ins_pipe( pipe_slow );
4408 %}
4409 
4410 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
4411   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4412   match(Set dst (DivVF src (LoadVector mem)));
4413   format %{ "vdivps  $dst,$src,$mem\t! div packed8F" %}
4414   ins_encode %{
4415     bool vector256 = true;
4416     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);






















4417   %}
4418   ins_pipe( pipe_slow );
4419 %}
4420 
4421 // Doubles vector div
4422 instruct vdiv2D(vecX dst, vecX src) %{
4423   predicate(n->as_Vector()->length() == 2);
4424   match(Set dst (DivVD dst src));
4425   format %{ "divpd   $dst,$src\t! div packed2D" %}
4426   ins_encode %{
4427     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
4428   %}
4429   ins_pipe( pipe_slow );
4430 %}
4431 
4432 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
4433   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4434   match(Set dst (DivVD src1 src2));
4435   format %{ "vdivpd  $dst,$src1,$src2\t! div packed2D" %}
4436   ins_encode %{
4437     bool vector256 = false;
4438     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4439   %}
4440   ins_pipe( pipe_slow );
4441 %}
4442 
4443 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
4444   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4445   match(Set dst (DivVD src (LoadVector mem)));
4446   format %{ "vdivpd  $dst,$src,$mem\t! div packed2D" %}
4447   ins_encode %{
4448     bool vector256 = false;
4449     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4450   %}
4451   ins_pipe( pipe_slow );
4452 %}
4453 
4454 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
4455   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4456   match(Set dst (DivVD src1 src2));
4457   format %{ "vdivpd  $dst,$src1,$src2\t! div packed4D" %}
4458   ins_encode %{
4459     bool vector256 = true;
4460     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4461   %}
4462   ins_pipe( pipe_slow );
4463 %}
4464 
4465 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
4466   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4467   match(Set dst (DivVD src (LoadVector mem)));
4468   format %{ "vdivpd  $dst,$src,$mem\t! div packed4D" %}
4469   ins_encode %{
4470     bool vector256 = true;
4471     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);






















4472   %}
4473   ins_pipe( pipe_slow );
4474 %}
4475 
4476 // ------------------------------ Shift ---------------------------------------
4477 
4478 // Left and right shift count vectors are the same on x86
4479 // (only lowest bits of xmm reg are used for count).
4480 instruct vshiftcnt(vecS dst, rRegI cnt) %{
4481   match(Set dst (LShiftCntV cnt));
4482   match(Set dst (RShiftCntV cnt));
4483   format %{ "movd    $dst,$cnt\t! load shift count" %}
4484   ins_encode %{
4485     __ movdl($dst$$XMMRegister, $cnt$$Register);
4486   %}
4487   ins_pipe( pipe_slow );
4488 %}
4489 
4490 // ------------------------------ LeftShift -----------------------------------
4491 


4498     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
4499   %}
4500   ins_pipe( pipe_slow );
4501 %}
4502 
4503 instruct vsll2S_imm(vecS dst, immI8 shift) %{
4504   predicate(n->as_Vector()->length() == 2);
4505   match(Set dst (LShiftVS dst shift));
4506   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
4507   ins_encode %{
4508     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
4509   %}
4510   ins_pipe( pipe_slow );
4511 %}
4512 
4513 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
4514   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4515   match(Set dst (LShiftVS src shift));
4516   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
4517   ins_encode %{
4518     bool vector256 = false;
4519     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4520   %}
4521   ins_pipe( pipe_slow );
4522 %}
4523 
4524 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
4525   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4526   match(Set dst (LShiftVS src shift));
4527   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
4528   ins_encode %{
4529     bool vector256 = false;
4530     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4531   %}
4532   ins_pipe( pipe_slow );
4533 %}
4534 
4535 instruct vsll4S(vecD dst, vecS shift) %{
4536   predicate(n->as_Vector()->length() == 4);
4537   match(Set dst (LShiftVS dst shift));
4538   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
4539   ins_encode %{
4540     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
4541   %}
4542   ins_pipe( pipe_slow );
4543 %}
4544 
4545 instruct vsll4S_imm(vecD dst, immI8 shift) %{
4546   predicate(n->as_Vector()->length() == 4);
4547   match(Set dst (LShiftVS dst shift));
4548   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
4549   ins_encode %{
4550     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
4551   %}
4552   ins_pipe( pipe_slow );
4553 %}
4554 
4555 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
4556   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4557   match(Set dst (LShiftVS src shift));
4558   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
4559   ins_encode %{
4560     bool vector256 = false;
4561     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4562   %}
4563   ins_pipe( pipe_slow );
4564 %}
4565 
4566 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
4567   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4568   match(Set dst (LShiftVS src shift));
4569   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
4570   ins_encode %{
4571     bool vector256 = false;
4572     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4573   %}
4574   ins_pipe( pipe_slow );
4575 %}
4576 
4577 instruct vsll8S(vecX dst, vecS shift) %{
4578   predicate(n->as_Vector()->length() == 8);
4579   match(Set dst (LShiftVS dst shift));
4580   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
4581   ins_encode %{
4582     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
4583   %}
4584   ins_pipe( pipe_slow );
4585 %}
4586 
4587 instruct vsll8S_imm(vecX dst, immI8 shift) %{
4588   predicate(n->as_Vector()->length() == 8);
4589   match(Set dst (LShiftVS dst shift));
4590   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
4591   ins_encode %{
4592     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
4593   %}
4594   ins_pipe( pipe_slow );
4595 %}
4596 
4597 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
4598   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4599   match(Set dst (LShiftVS src shift));
4600   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
4601   ins_encode %{
4602     bool vector256 = false;
4603     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4604   %}
4605   ins_pipe( pipe_slow );
4606 %}
4607 
4608 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
4609   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4610   match(Set dst (LShiftVS src shift));
4611   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
4612   ins_encode %{
4613     bool vector256 = false;
4614     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4615   %}
4616   ins_pipe( pipe_slow );
4617 %}
4618 
4619 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
4620   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4621   match(Set dst (LShiftVS src shift));
4622   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
4623   ins_encode %{
4624     bool vector256 = true;
4625     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4626   %}
4627   ins_pipe( pipe_slow );
4628 %}
4629 
4630 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
4631   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4632   match(Set dst (LShiftVS src shift));
4633   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
4634   ins_encode %{
4635     bool vector256 = true;
4636     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);






















4637   %}
4638   ins_pipe( pipe_slow );
4639 %}
4640 
4641 // Integers vector left shift
4642 instruct vsll2I(vecD dst, vecS shift) %{
4643   predicate(n->as_Vector()->length() == 2);
4644   match(Set dst (LShiftVI dst shift));
4645   format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
4646   ins_encode %{
4647     __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
4648   %}
4649   ins_pipe( pipe_slow );
4650 %}
4651 
4652 instruct vsll2I_imm(vecD dst, immI8 shift) %{
4653   predicate(n->as_Vector()->length() == 2);
4654   match(Set dst (LShiftVI dst shift));
4655   format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
4656   ins_encode %{
4657     __ pslld($dst$$XMMRegister, (int)$shift$$constant);
4658   %}
4659   ins_pipe( pipe_slow );
4660 %}
4661 
4662 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{
4663   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4664   match(Set dst (LShiftVI src shift));
4665   format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
4666   ins_encode %{
4667     bool vector256 = false;
4668     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4669   %}
4670   ins_pipe( pipe_slow );
4671 %}
4672 
4673 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
4674   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4675   match(Set dst (LShiftVI src shift));
4676   format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
4677   ins_encode %{
4678     bool vector256 = false;
4679     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4680   %}
4681   ins_pipe( pipe_slow );
4682 %}
4683 
4684 instruct vsll4I(vecX dst, vecS shift) %{
4685   predicate(n->as_Vector()->length() == 4);
4686   match(Set dst (LShiftVI dst shift));
4687   format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
4688   ins_encode %{
4689     __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
4690   %}
4691   ins_pipe( pipe_slow );
4692 %}
4693 
4694 instruct vsll4I_imm(vecX dst, immI8 shift) %{
4695   predicate(n->as_Vector()->length() == 4);
4696   match(Set dst (LShiftVI dst shift));
4697   format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
4698   ins_encode %{
4699     __ pslld($dst$$XMMRegister, (int)$shift$$constant);
4700   %}
4701   ins_pipe( pipe_slow );
4702 %}
4703 
4704 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{
4705   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4706   match(Set dst (LShiftVI src shift));
4707   format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
4708   ins_encode %{
4709     bool vector256 = false;
4710     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4711   %}
4712   ins_pipe( pipe_slow );
4713 %}
4714 
4715 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
4716   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4717   match(Set dst (LShiftVI src shift));
4718   format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
4719   ins_encode %{
4720     bool vector256 = false;
4721     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4722   %}
4723   ins_pipe( pipe_slow );
4724 %}
4725 
4726 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{
4727   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4728   match(Set dst (LShiftVI src shift));
4729   format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
4730   ins_encode %{
4731     bool vector256 = true;
4732     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4733   %}
4734   ins_pipe( pipe_slow );
4735 %}
4736 
4737 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4738   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4739   match(Set dst (LShiftVI src shift));
4740   format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
4741   ins_encode %{
4742     bool vector256 = true;
4743     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);






















4744   %}
4745   ins_pipe( pipe_slow );
4746 %}
4747 
4748 // Longs vector left shift
4749 instruct vsll2L(vecX dst, vecS shift) %{
4750   predicate(n->as_Vector()->length() == 2);
4751   match(Set dst (LShiftVL dst shift));
4752   format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
4753   ins_encode %{
4754     __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
4755   %}
4756   ins_pipe( pipe_slow );
4757 %}
4758 
4759 instruct vsll2L_imm(vecX dst, immI8 shift) %{
4760   predicate(n->as_Vector()->length() == 2);
4761   match(Set dst (LShiftVL dst shift));
4762   format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
4763   ins_encode %{
4764     __ psllq($dst$$XMMRegister, (int)$shift$$constant);
4765   %}
4766   ins_pipe( pipe_slow );
4767 %}
4768 
4769 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{
4770   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4771   match(Set dst (LShiftVL src shift));
4772   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
4773   ins_encode %{
4774     bool vector256 = false;
4775     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4776   %}
4777   ins_pipe( pipe_slow );
4778 %}
4779 
4780 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
4781   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4782   match(Set dst (LShiftVL src shift));
4783   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
4784   ins_encode %{
4785     bool vector256 = false;
4786     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4787   %}
4788   ins_pipe( pipe_slow );
4789 %}
4790 
4791 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{
4792   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4793   match(Set dst (LShiftVL src shift));
4794   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
4795   ins_encode %{
4796     bool vector256 = true;
4797     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4798   %}
4799   ins_pipe( pipe_slow );
4800 %}
4801 
4802 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
4803   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4804   match(Set dst (LShiftVL src shift));
4805   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
4806   ins_encode %{
4807     bool vector256 = true;
4808     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);






















4809   %}
4810   ins_pipe( pipe_slow );
4811 %}
4812 
4813 // ----------------------- LogicalRightShift -----------------------------------
4814 
4815 // Shorts vector logical right shift produces incorrect Java result
4816 // for negative data because java code convert short value into int with
4817 // sign extension before a shift. But char vectors are fine since chars are
4818 // unsigned values.
4819 
4820 instruct vsrl2S(vecS dst, vecS shift) %{
4821   predicate(n->as_Vector()->length() == 2);
4822   match(Set dst (URShiftVS dst shift));
4823   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
4824   ins_encode %{
4825     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4826   %}
4827   ins_pipe( pipe_slow );
4828 %}
4829 
4830 instruct vsrl2S_imm(vecS dst, immI8 shift) %{
4831   predicate(n->as_Vector()->length() == 2);
4832   match(Set dst (URShiftVS dst shift));
4833   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
4834   ins_encode %{
4835     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4836   %}
4837   ins_pipe( pipe_slow );
4838 %}
4839 
4840 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
4841   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4842   match(Set dst (URShiftVS src shift));
4843   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
4844   ins_encode %{
4845     bool vector256 = false;
4846     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4847   %}
4848   ins_pipe( pipe_slow );
4849 %}
4850 
4851 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
4852   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4853   match(Set dst (URShiftVS src shift));
4854   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
4855   ins_encode %{
4856     bool vector256 = false;
4857     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4858   %}
4859   ins_pipe( pipe_slow );
4860 %}
4861 
4862 instruct vsrl4S(vecD dst, vecS shift) %{
4863   predicate(n->as_Vector()->length() == 4);
4864   match(Set dst (URShiftVS dst shift));
4865   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
4866   ins_encode %{
4867     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4868   %}
4869   ins_pipe( pipe_slow );
4870 %}
4871 
4872 instruct vsrl4S_imm(vecD dst, immI8 shift) %{
4873   predicate(n->as_Vector()->length() == 4);
4874   match(Set dst (URShiftVS dst shift));
4875   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
4876   ins_encode %{
4877     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4878   %}
4879   ins_pipe( pipe_slow );
4880 %}
4881 
4882 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
4883   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4884   match(Set dst (URShiftVS src shift));
4885   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
4886   ins_encode %{
4887     bool vector256 = false;
4888     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4889   %}
4890   ins_pipe( pipe_slow );
4891 %}
4892 
4893 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
4894   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4895   match(Set dst (URShiftVS src shift));
4896   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
4897   ins_encode %{
4898     bool vector256 = false;
4899     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4900   %}
4901   ins_pipe( pipe_slow );
4902 %}
4903 
4904 instruct vsrl8S(vecX dst, vecS shift) %{
4905   predicate(n->as_Vector()->length() == 8);
4906   match(Set dst (URShiftVS dst shift));
4907   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
4908   ins_encode %{
4909     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4910   %}
4911   ins_pipe( pipe_slow );
4912 %}
4913 
4914 instruct vsrl8S_imm(vecX dst, immI8 shift) %{
4915   predicate(n->as_Vector()->length() == 8);
4916   match(Set dst (URShiftVS dst shift));
4917   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
4918   ins_encode %{
4919     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4920   %}
4921   ins_pipe( pipe_slow );
4922 %}
4923 
4924 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
4925   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4926   match(Set dst (URShiftVS src shift));
4927   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
4928   ins_encode %{
4929     bool vector256 = false;
4930     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4931   %}
4932   ins_pipe( pipe_slow );
4933 %}
4934 
4935 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
4936   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4937   match(Set dst (URShiftVS src shift));
4938   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
4939   ins_encode %{
4940     bool vector256 = false;
4941     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4942   %}
4943   ins_pipe( pipe_slow );
4944 %}
4945 
4946 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
4947   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4948   match(Set dst (URShiftVS src shift));
4949   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
4950   ins_encode %{
4951     bool vector256 = true;
4952     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4953   %}
4954   ins_pipe( pipe_slow );
4955 %}
4956 
4957 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
4958   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4959   match(Set dst (URShiftVS src shift));
4960   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
4961   ins_encode %{
4962     bool vector256 = true;
4963     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);






















4964   %}
4965   ins_pipe( pipe_slow );
4966 %}
4967 
4968 // Integers vector logical right shift
4969 instruct vsrl2I(vecD dst, vecS shift) %{
4970   predicate(n->as_Vector()->length() == 2);
4971   match(Set dst (URShiftVI dst shift));
4972   format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
4973   ins_encode %{
4974     __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
4975   %}
4976   ins_pipe( pipe_slow );
4977 %}
4978 
4979 instruct vsrl2I_imm(vecD dst, immI8 shift) %{
4980   predicate(n->as_Vector()->length() == 2);
4981   match(Set dst (URShiftVI dst shift));
4982   format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
4983   ins_encode %{
4984     __ psrld($dst$$XMMRegister, (int)$shift$$constant);
4985   %}
4986   ins_pipe( pipe_slow );
4987 %}
4988 
4989 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{
4990   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4991   match(Set dst (URShiftVI src shift));
4992   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
4993   ins_encode %{
4994     bool vector256 = false;
4995     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4996   %}
4997   ins_pipe( pipe_slow );
4998 %}
4999 
5000 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
5001   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5002   match(Set dst (URShiftVI src shift));
5003   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
5004   ins_encode %{
5005     bool vector256 = false;
5006     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
5007   %}
5008   ins_pipe( pipe_slow );
5009 %}
5010 
5011 instruct vsrl4I(vecX dst, vecS shift) %{
5012   predicate(n->as_Vector()->length() == 4);
5013   match(Set dst (URShiftVI dst shift));
5014   format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
5015   ins_encode %{
5016     __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
5017   %}
5018   ins_pipe( pipe_slow );
5019 %}
5020 
5021 instruct vsrl4I_imm(vecX dst, immI8 shift) %{
5022   predicate(n->as_Vector()->length() == 4);
5023   match(Set dst (URShiftVI dst shift));
5024   format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
5025   ins_encode %{
5026     __ psrld($dst$$XMMRegister, (int)$shift$$constant);
5027   %}
5028   ins_pipe( pipe_slow );
5029 %}
5030 
5031 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{
5032   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5033   match(Set dst (URShiftVI src shift));
5034   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
5035   ins_encode %{
5036     bool vector256 = false;
5037     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
5038   %}
5039   ins_pipe( pipe_slow );
5040 %}
5041 
5042 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
5043   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5044   match(Set dst (URShiftVI src shift));
5045   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
5046   ins_encode %{
5047     bool vector256 = false;
5048     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
5049   %}
5050   ins_pipe( pipe_slow );
5051 %}
5052 
5053 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{
5054   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5055   match(Set dst (URShiftVI src shift));
5056   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
5057   ins_encode %{
5058     bool vector256 = true;
5059     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
5060   %}
5061   ins_pipe( pipe_slow );
5062 %}
5063 
5064 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
5065   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5066   match(Set dst (URShiftVI src shift));
5067   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
5068   ins_encode %{
5069     bool vector256 = true;
5070     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);






















5071   %}
5072   ins_pipe( pipe_slow );
5073 %}
5074 
5075 // Longs vector logical right shift
5076 instruct vsrl2L(vecX dst, vecS shift) %{
5077   predicate(n->as_Vector()->length() == 2);
5078   match(Set dst (URShiftVL dst shift));
5079   format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
5080   ins_encode %{
5081     __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
5082   %}
5083   ins_pipe( pipe_slow );
5084 %}
5085 
5086 instruct vsrl2L_imm(vecX dst, immI8 shift) %{
5087   predicate(n->as_Vector()->length() == 2);
5088   match(Set dst (URShiftVL dst shift));
5089   format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
5090   ins_encode %{
5091     __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
5092   %}
5093   ins_pipe( pipe_slow );
5094 %}
5095 
5096 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{
5097   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5098   match(Set dst (URShiftVL src shift));
5099   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
5100   ins_encode %{
5101     bool vector256 = false;
5102     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
5103   %}
5104   ins_pipe( pipe_slow );
5105 %}
5106 
5107 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
5108   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5109   match(Set dst (URShiftVL src shift));
5110   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
5111   ins_encode %{
5112     bool vector256 = false;
5113     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
5114   %}
5115   ins_pipe( pipe_slow );
5116 %}
5117 
5118 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{
5119   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
5120   match(Set dst (URShiftVL src shift));
5121   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
5122   ins_encode %{
5123     bool vector256 = true;
5124     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
5125   %}
5126   ins_pipe( pipe_slow );
5127 %}
5128 
5129 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
5130   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
5131   match(Set dst (URShiftVL src shift));
5132   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
5133   ins_encode %{
5134     bool vector256 = true;
5135     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);






















5136   %}
5137   ins_pipe( pipe_slow );
5138 %}
5139 
5140 // ------------------- ArithmeticRightShift -----------------------------------
5141 
5142 // Shorts/Chars vector arithmetic right shift
5143 instruct vsra2S(vecS dst, vecS shift) %{
5144   predicate(n->as_Vector()->length() == 2);
5145   match(Set dst (RShiftVS dst shift));
5146   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
5147   ins_encode %{
5148     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
5149   %}
5150   ins_pipe( pipe_slow );
5151 %}
5152 
5153 instruct vsra2S_imm(vecS dst, immI8 shift) %{
5154   predicate(n->as_Vector()->length() == 2);
5155   match(Set dst (RShiftVS dst shift));
5156   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
5157   ins_encode %{
5158     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
5159   %}
5160   ins_pipe( pipe_slow );
5161 %}
5162 
5163 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
5164   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5165   match(Set dst (RShiftVS src shift));
5166   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
5167   ins_encode %{
5168     bool vector256 = false;
5169     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
5170   %}
5171   ins_pipe( pipe_slow );
5172 %}
5173 
5174 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
5175   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5176   match(Set dst (RShiftVS src shift));
5177   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
5178   ins_encode %{
5179     bool vector256 = false;
5180     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
5181   %}
5182   ins_pipe( pipe_slow );
5183 %}
5184 
5185 instruct vsra4S(vecD dst, vecS shift) %{
5186   predicate(n->as_Vector()->length() == 4);
5187   match(Set dst (RShiftVS dst shift));
5188   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
5189   ins_encode %{
5190     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
5191   %}
5192   ins_pipe( pipe_slow );
5193 %}
5194 
5195 instruct vsra4S_imm(vecD dst, immI8 shift) %{
5196   predicate(n->as_Vector()->length() == 4);
5197   match(Set dst (RShiftVS dst shift));
5198   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
5199   ins_encode %{
5200     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
5201   %}
5202   ins_pipe( pipe_slow );
5203 %}
5204 
5205 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
5206   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5207   match(Set dst (RShiftVS src shift));
5208   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
5209   ins_encode %{
5210     bool vector256 = false;
5211     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
5212   %}
5213   ins_pipe( pipe_slow );
5214 %}
5215 
5216 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
5217   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5218   match(Set dst (RShiftVS src shift));
5219   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
5220   ins_encode %{
5221     bool vector256 = false;
5222     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
5223   %}
5224   ins_pipe( pipe_slow );
5225 %}
5226 
5227 instruct vsra8S(vecX dst, vecS shift) %{
5228   predicate(n->as_Vector()->length() == 8);
5229   match(Set dst (RShiftVS dst shift));
5230   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
5231   ins_encode %{
5232     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
5233   %}
5234   ins_pipe( pipe_slow );
5235 %}
5236 
5237 instruct vsra8S_imm(vecX dst, immI8 shift) %{
5238   predicate(n->as_Vector()->length() == 8);
5239   match(Set dst (RShiftVS dst shift));
5240   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
5241   ins_encode %{
5242     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
5243   %}
5244   ins_pipe( pipe_slow );
5245 %}
5246 
5247 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
5248   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5249   match(Set dst (RShiftVS src shift));
5250   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
5251   ins_encode %{
5252     bool vector256 = false;
5253     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
5254   %}
5255   ins_pipe( pipe_slow );
5256 %}
5257 
5258 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
5259   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5260   match(Set dst (RShiftVS src shift));
5261   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
5262   ins_encode %{
5263     bool vector256 = false;
5264     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
5265   %}
5266   ins_pipe( pipe_slow );
5267 %}
5268 
5269 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
5270   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
5271   match(Set dst (RShiftVS src shift));
5272   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
5273   ins_encode %{
5274     bool vector256 = true;
5275     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
5276   %}
5277   ins_pipe( pipe_slow );
5278 %}
5279 
5280 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
5281   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
5282   match(Set dst (RShiftVS src shift));
5283   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
5284   ins_encode %{
5285     bool vector256 = true;
5286     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);






















5287   %}
5288   ins_pipe( pipe_slow );
5289 %}
5290 
5291 // Integers vector arithmetic right shift
5292 instruct vsra2I(vecD dst, vecS shift) %{
5293   predicate(n->as_Vector()->length() == 2);
5294   match(Set dst (RShiftVI dst shift));
5295   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
5296   ins_encode %{
5297     __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
5298   %}
5299   ins_pipe( pipe_slow );
5300 %}
5301 
5302 instruct vsra2I_imm(vecD dst, immI8 shift) %{
5303   predicate(n->as_Vector()->length() == 2);
5304   match(Set dst (RShiftVI dst shift));
5305   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
5306   ins_encode %{
5307     __ psrad($dst$$XMMRegister, (int)$shift$$constant);
5308   %}
5309   ins_pipe( pipe_slow );
5310 %}
5311 
5312 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{
5313   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5314   match(Set dst (RShiftVI src shift));
5315   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
5316   ins_encode %{
5317     bool vector256 = false;
5318     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
5319   %}
5320   ins_pipe( pipe_slow );
5321 %}
5322 
5323 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
5324   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5325   match(Set dst (RShiftVI src shift));
5326   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
5327   ins_encode %{
5328     bool vector256 = false;
5329     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
5330   %}
5331   ins_pipe( pipe_slow );
5332 %}
5333 
5334 instruct vsra4I(vecX dst, vecS shift) %{
5335   predicate(n->as_Vector()->length() == 4);
5336   match(Set dst (RShiftVI dst shift));
5337   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
5338   ins_encode %{
5339     __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
5340   %}
5341   ins_pipe( pipe_slow );
5342 %}
5343 
5344 instruct vsra4I_imm(vecX dst, immI8 shift) %{
5345   predicate(n->as_Vector()->length() == 4);
5346   match(Set dst (RShiftVI dst shift));
5347   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
5348   ins_encode %{
5349     __ psrad($dst$$XMMRegister, (int)$shift$$constant);
5350   %}
5351   ins_pipe( pipe_slow );
5352 %}
5353 
5354 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{
5355   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5356   match(Set dst (RShiftVI src shift));
5357   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
5358   ins_encode %{
5359     bool vector256 = false;
5360     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
5361   %}
5362   ins_pipe( pipe_slow );
5363 %}
5364 
5365 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
5366   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5367   match(Set dst (RShiftVI src shift));
5368   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
5369   ins_encode %{
5370     bool vector256 = false;
5371     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
5372   %}
5373   ins_pipe( pipe_slow );
5374 %}
5375 
5376 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{
5377   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5378   match(Set dst (RShiftVI src shift));
5379   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
5380   ins_encode %{
5381     bool vector256 = true;
5382     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
5383   %}
5384   ins_pipe( pipe_slow );
5385 %}
5386 
5387 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
5388   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5389   match(Set dst (RShiftVI src shift));
5390   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
5391   ins_encode %{
5392     bool vector256 = true;
5393     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);






















5394   %}
5395   ins_pipe( pipe_slow );
5396 %}
5397 
5398 // There are no longs vector arithmetic right shift instructions.
5399 
5400 
5401 // --------------------------------- AND --------------------------------------
5402 
5403 instruct vand4B(vecS dst, vecS src) %{
5404   predicate(n->as_Vector()->length_in_bytes() == 4);
5405   match(Set dst (AndV dst src));
5406   format %{ "pand    $dst,$src\t! and vectors (4 bytes)" %}
5407   ins_encode %{
5408     __ pand($dst$$XMMRegister, $src$$XMMRegister);
5409   %}
5410   ins_pipe( pipe_slow );
5411 %}
5412 
5413 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
5414   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
5415   match(Set dst (AndV src1 src2));
5416   format %{ "vpand   $dst,$src1,$src2\t! and vectors (4 bytes)" %}
5417   ins_encode %{
5418     bool vector256 = false;
5419     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5420   %}
5421   ins_pipe( pipe_slow );
5422 %}
5423 
5424 instruct vand8B(vecD dst, vecD src) %{
5425   predicate(n->as_Vector()->length_in_bytes() == 8);
5426   match(Set dst (AndV dst src));
5427   format %{ "pand    $dst,$src\t! and vectors (8 bytes)" %}
5428   ins_encode %{
5429     __ pand($dst$$XMMRegister, $src$$XMMRegister);
5430   %}
5431   ins_pipe( pipe_slow );
5432 %}
5433 
5434 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
5435   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
5436   match(Set dst (AndV src1 src2));
5437   format %{ "vpand   $dst,$src1,$src2\t! and vectors (8 bytes)" %}
5438   ins_encode %{
5439     bool vector256 = false;
5440     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5441   %}
5442   ins_pipe( pipe_slow );
5443 %}
5444 
5445 instruct vand16B(vecX dst, vecX src) %{
5446   predicate(n->as_Vector()->length_in_bytes() == 16);
5447   match(Set dst (AndV dst src));
5448   format %{ "pand    $dst,$src\t! and vectors (16 bytes)" %}
5449   ins_encode %{
5450     __ pand($dst$$XMMRegister, $src$$XMMRegister);
5451   %}
5452   ins_pipe( pipe_slow );
5453 %}
5454 
5455 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
5456   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
5457   match(Set dst (AndV src1 src2));
5458   format %{ "vpand   $dst,$src1,$src2\t! and vectors (16 bytes)" %}
5459   ins_encode %{
5460     bool vector256 = false;
5461     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5462   %}
5463   ins_pipe( pipe_slow );
5464 %}
5465 
5466 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
5467   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
5468   match(Set dst (AndV src (LoadVector mem)));
5469   format %{ "vpand   $dst,$src,$mem\t! and vectors (16 bytes)" %}
5470   ins_encode %{
5471     bool vector256 = false;
5472     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
5473   %}
5474   ins_pipe( pipe_slow );
5475 %}
5476 
5477 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
5478   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
5479   match(Set dst (AndV src1 src2));
5480   format %{ "vpand   $dst,$src1,$src2\t! and vectors (32 bytes)" %}
5481   ins_encode %{
5482     bool vector256 = true;
5483     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5484   %}
5485   ins_pipe( pipe_slow );
5486 %}
5487 
5488 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
5489   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
5490   match(Set dst (AndV src (LoadVector mem)));
5491   format %{ "vpand   $dst,$src,$mem\t! and vectors (32 bytes)" %}
5492   ins_encode %{
5493     bool vector256 = true;
5494     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);






















5495   %}
5496   ins_pipe( pipe_slow );
5497 %}
5498 
5499 // --------------------------------- OR ---------------------------------------
5500 
5501 instruct vor4B(vecS dst, vecS src) %{
5502   predicate(n->as_Vector()->length_in_bytes() == 4);
5503   match(Set dst (OrV dst src));
5504   format %{ "por     $dst,$src\t! or vectors (4 bytes)" %}
5505   ins_encode %{
5506     __ por($dst$$XMMRegister, $src$$XMMRegister);
5507   %}
5508   ins_pipe( pipe_slow );
5509 %}
5510 
5511 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
5512   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
5513   match(Set dst (OrV src1 src2));
5514   format %{ "vpor    $dst,$src1,$src2\t! or vectors (4 bytes)" %}
5515   ins_encode %{
5516     bool vector256 = false;
5517     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5518   %}
5519   ins_pipe( pipe_slow );
5520 %}
5521 
5522 instruct vor8B(vecD dst, vecD src) %{
5523   predicate(n->as_Vector()->length_in_bytes() == 8);
5524   match(Set dst (OrV dst src));
5525   format %{ "por     $dst,$src\t! or vectors (8 bytes)" %}
5526   ins_encode %{
5527     __ por($dst$$XMMRegister, $src$$XMMRegister);
5528   %}
5529   ins_pipe( pipe_slow );
5530 %}
5531 
5532 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
5533   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
5534   match(Set dst (OrV src1 src2));
5535   format %{ "vpor    $dst,$src1,$src2\t! or vectors (8 bytes)" %}
5536   ins_encode %{
5537     bool vector256 = false;
5538     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5539   %}
5540   ins_pipe( pipe_slow );
5541 %}
5542 
5543 instruct vor16B(vecX dst, vecX src) %{
5544   predicate(n->as_Vector()->length_in_bytes() == 16);
5545   match(Set dst (OrV dst src));
5546   format %{ "por     $dst,$src\t! or vectors (16 bytes)" %}
5547   ins_encode %{
5548     __ por($dst$$XMMRegister, $src$$XMMRegister);
5549   %}
5550   ins_pipe( pipe_slow );
5551 %}
5552 
5553 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
5554   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
5555   match(Set dst (OrV src1 src2));
5556   format %{ "vpor    $dst,$src1,$src2\t! or vectors (16 bytes)" %}
5557   ins_encode %{
5558     bool vector256 = false;
5559     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5560   %}
5561   ins_pipe( pipe_slow );
5562 %}
5563 
5564 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
5565   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
5566   match(Set dst (OrV src (LoadVector mem)));
5567   format %{ "vpor    $dst,$src,$mem\t! or vectors (16 bytes)" %}
5568   ins_encode %{
5569     bool vector256 = false;
5570     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
5571   %}
5572   ins_pipe( pipe_slow );
5573 %}
5574 
5575 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
5576   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
5577   match(Set dst (OrV src1 src2));
5578   format %{ "vpor    $dst,$src1,$src2\t! or vectors (32 bytes)" %}
5579   ins_encode %{
5580     bool vector256 = true;
5581     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5582   %}
5583   ins_pipe( pipe_slow );
5584 %}
5585 
5586 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
5587   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
5588   match(Set dst (OrV src (LoadVector mem)));
5589   format %{ "vpor    $dst,$src,$mem\t! or vectors (32 bytes)" %}
5590   ins_encode %{
5591     bool vector256 = true;
5592     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);






















5593   %}
5594   ins_pipe( pipe_slow );
5595 %}
5596 
5597 // --------------------------------- XOR --------------------------------------
5598 
5599 instruct vxor4B(vecS dst, vecS src) %{
5600   predicate(n->as_Vector()->length_in_bytes() == 4);
5601   match(Set dst (XorV dst src));
5602   format %{ "pxor    $dst,$src\t! xor vectors (4 bytes)" %}
5603   ins_encode %{
5604     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
5605   %}
5606   ins_pipe( pipe_slow );
5607 %}
5608 
5609 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
5610   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
5611   match(Set dst (XorV src1 src2));
5612   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
5613   ins_encode %{
5614     bool vector256 = false;
5615     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5616   %}
5617   ins_pipe( pipe_slow );
5618 %}
5619 
5620 instruct vxor8B(vecD dst, vecD src) %{
5621   predicate(n->as_Vector()->length_in_bytes() == 8);
5622   match(Set dst (XorV dst src));
5623   format %{ "pxor    $dst,$src\t! xor vectors (8 bytes)" %}
5624   ins_encode %{
5625     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
5626   %}
5627   ins_pipe( pipe_slow );
5628 %}
5629 
5630 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
5631   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
5632   match(Set dst (XorV src1 src2));
5633   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
5634   ins_encode %{
5635     bool vector256 = false;
5636     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5637   %}
5638   ins_pipe( pipe_slow );
5639 %}
5640 
5641 instruct vxor16B(vecX dst, vecX src) %{
5642   predicate(n->as_Vector()->length_in_bytes() == 16);
5643   match(Set dst (XorV dst src));
5644   format %{ "pxor    $dst,$src\t! xor vectors (16 bytes)" %}
5645   ins_encode %{
5646     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
5647   %}
5648   ins_pipe( pipe_slow );
5649 %}
5650 
5651 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
5652   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
5653   match(Set dst (XorV src1 src2));
5654   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
5655   ins_encode %{
5656     bool vector256 = false;
5657     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5658   %}
5659   ins_pipe( pipe_slow );
5660 %}
5661 
5662 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
5663   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
5664   match(Set dst (XorV src (LoadVector mem)));
5665   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (16 bytes)" %}
5666   ins_encode %{
5667     bool vector256 = false;
5668     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
5669   %}
5670   ins_pipe( pipe_slow );
5671 %}
5672 
5673 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
5674   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
5675   match(Set dst (XorV src1 src2));
5676   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
5677   ins_encode %{
5678     bool vector256 = true;
5679     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5680   %}
5681   ins_pipe( pipe_slow );
5682 %}
5683 
5684 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
5685   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
5686   match(Set dst (XorV src (LoadVector mem)));
5687   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (32 bytes)" %}
5688   ins_encode %{
5689     bool vector256 = true;
5690     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);






















5691   %}
5692   ins_pipe( pipe_slow );
5693 %}
5694 


  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  63 // Word a in each register holds a Float, words ab hold a Double.
  64 // The whole registers are used in SSE4.2 version intrinsics,
  65 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  66 // UseXMMForArrayCopy and UseSuperword flags).
  67 // For pre EVEX enabled architectures:
  68 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  69 // For EVEX enabled architectures:
  70 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  71 //
  72 // Linux ABI:   No register preserved across function calls
  73 //              XMM0-XMM7 might hold parameters
  74 // Windows ABI: XMM6-XMM31 preserved across function calls
  75 //              XMM0-XMM3 might hold parameters
  76 
  77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  93 
  94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
 110 
 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
 127 
 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
 144 
 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
 161 
 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
 178 
 179 #ifdef _WIN64
 180 
 181 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
 182 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
 183 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
 184 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
 185 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
 186 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
 187 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
 188 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
 189 reg_def XMM6i( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(8));
 190 reg_def XMM6j( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(9));
 191 reg_def XMM6k( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(10));
 192 reg_def XMM6l( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(11));
 193 reg_def XMM6m( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(12));
 194 reg_def XMM6n( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(13));
 195 reg_def XMM6o( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(14));
 196 reg_def XMM6p( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(15));
 197 
 198 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
 199 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
 200 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
 201 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
 202 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
 203 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
 204 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
 205 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
 206 reg_def XMM7i( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(8));
 207 reg_def XMM7j( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(9));
 208 reg_def XMM7k( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(10));
 209 reg_def XMM7l( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(11));
 210 reg_def XMM7m( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(12));
 211 reg_def XMM7n( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(13));
 212 reg_def XMM7o( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(14));
 213 reg_def XMM7p( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(15));
 214 
 215 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
 216 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
 217 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
 218 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
 219 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
 220 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
 221 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
 222 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
 223 reg_def XMM8i( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(8));
 224 reg_def XMM8j( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(9));
 225 reg_def XMM8k( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(10));
 226 reg_def XMM8l( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(11));
 227 reg_def XMM8m( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(12));
 228 reg_def XMM8n( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(13));
 229 reg_def XMM8o( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(14));
 230 reg_def XMM8p( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(15));
 231 
 232 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
 233 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
 234 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
 235 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
 236 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
 237 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
 238 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
 239 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
 240 reg_def XMM9i( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(8));
 241 reg_def XMM9j( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(9));
 242 reg_def XMM9k( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(10));
 243 reg_def XMM9l( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(11));
 244 reg_def XMM9m( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(12));
 245 reg_def XMM9n( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(13));
 246 reg_def XMM9o( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(14));
 247 reg_def XMM9p( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(15));
 248 
 249 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 250 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
 251 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
 252 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
 253 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
 254 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
 255 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
 256 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
 257 reg_def XMM10i( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(8));
 258 reg_def XMM10j( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(9));
 259 reg_def XMM10k( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(10));
 260 reg_def XMM10l( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(11));
 261 reg_def XMM10m( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(12));
 262 reg_def XMM10n( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(13));
 263 reg_def XMM10o( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(14));
 264 reg_def XMM10p( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(15));
 265 
 266 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 267 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
 268 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
 269 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
 270 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
 271 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
 272 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
 273 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
 274 reg_def XMM11i( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(8));
 275 reg_def XMM11j( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(9));
 276 reg_def XMM11k( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(10));
 277 reg_def XMM11l( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(11));
 278 reg_def XMM11m( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(12));
 279 reg_def XMM11n( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(13));
 280 reg_def XMM11o( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(14));
 281 reg_def XMM11p( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(15));
 282 
 283 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 284 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
 285 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
 286 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
 287 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
 288 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
 289 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
 290 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
 291 reg_def XMM12i( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(8));
 292 reg_def XMM12j( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(9));
 293 reg_def XMM12k( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(10));
 294 reg_def XMM12l( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(11));
 295 reg_def XMM12m( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(12));
 296 reg_def XMM12n( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(13));
 297 reg_def XMM12o( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(14));
 298 reg_def XMM12p( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(15));
 299 
 300 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 301 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
 302 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
 303 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
 304 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
 305 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
 306 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
 307 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
 308 reg_def XMM13i( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(8));
 309 reg_def XMM13j( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(9));
 310 reg_def XMM13k( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(10));
 311 reg_def XMM13l( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(11));
 312 reg_def XMM13m( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(12));
 313 reg_def XMM13n( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(13));
 314 reg_def XMM13o( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(14));
 315 reg_def XMM13p( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(15));
 316 
 317 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 318 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
 319 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
 320 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
 321 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
 322 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
 323 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
 324 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
 325 reg_def XMM14i( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(8));
 326 reg_def XMM14j( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(9));
 327 reg_def XMM14k( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(10));
 328 reg_def XMM14l( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(11));
 329 reg_def XMM14m( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(12));
 330 reg_def XMM14n( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(13));
 331 reg_def XMM14o( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(14));
 332 reg_def XMM14p( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(15));
 333 
 334 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 335 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
 336 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
 337 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
 338 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
 339 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
 340 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
 341 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
 342 reg_def XMM15i( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(8));
 343 reg_def XMM15j( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(9));
 344 reg_def XMM15k( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(10));
 345 reg_def XMM15l( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(11));
 346 reg_def XMM15m( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(12));
 347 reg_def XMM15n( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(13));
 348 reg_def XMM15o( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(14));
 349 reg_def XMM15p( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(15));
 350 
 351 reg_def XMM16 ( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg());
 352 reg_def XMM16b( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(1));
 353 reg_def XMM16c( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(2));
 354 reg_def XMM16d( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(3));
 355 reg_def XMM16e( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(4));
 356 reg_def XMM16f( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(5));
 357 reg_def XMM16g( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(6));
 358 reg_def XMM16h( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(7));
 359 reg_def XMM16i( SOC, SOE, Op_RegF, 16, xmm15->as_VMReg()->next(8));
 360 reg_def XMM16j( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(9));
 361 reg_def XMM16k( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(10));
 362 reg_def XMM16l( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(11));
 363 reg_def XMM16m( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(12));
 364 reg_def XMM16n( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(13));
 365 reg_def XMM16o( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(14));
 366 reg_def XMM16p( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(15));
 367 
 368 reg_def XMM17 ( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg());
 369 reg_def XMM17b( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(1));
 370 reg_def XMM17c( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(2));
 371 reg_def XMM17d( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(3));
 372 reg_def XMM17e( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(4));
 373 reg_def XMM17f( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(5));
 374 reg_def XMM17g( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(6));
 375 reg_def XMM17h( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(7));
 376 reg_def XMM17i( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(8));
 377 reg_def XMM17j( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(9));
 378 reg_def XMM17k( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(10));
 379 reg_def XMM17l( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(11));
 380 reg_def XMM17m( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(12));
 381 reg_def XMM17n( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(13));
 382 reg_def XMM17o( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(14));
 383 reg_def XMM17p( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(15));
 384 
 385 reg_def XMM18 ( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg());
 386 reg_def XMM18b( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(1));
 387 reg_def XMM18c( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(2));
 388 reg_def XMM18d( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(3));
 389 reg_def XMM18e( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(4));
 390 reg_def XMM18f( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(5));
 391 reg_def XMM18g( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(6));
 392 reg_def XMM18h( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(7));
 393 reg_def XMM18i( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(8));
 394 reg_def XMM18j( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(9));
 395 reg_def XMM18k( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(10));
 396 reg_def XMM18l( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(11));
 397 reg_def XMM18m( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(12));
 398 reg_def XMM18n( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(13));
 399 reg_def XMM18o( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(14));
 400 reg_def XMM18p( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(15));
 401 
 402 reg_def XMM19 ( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg());
 403 reg_def XMM19b( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(1));
 404 reg_def XMM19c( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(2));
 405 reg_def XMM19d( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(3));
 406 reg_def XMM19e( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(4));
 407 reg_def XMM19f( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(5));
 408 reg_def XMM19g( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(6));
 409 reg_def XMM19h( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(7));
 410 reg_def XMM19i( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(8));
 411 reg_def XMM19j( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(9));
 412 reg_def XMM19k( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(10));
 413 reg_def XMM19l( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(11));
 414 reg_def XMM19m( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(12));
 415 reg_def XMM19n( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(13));
 416 reg_def XMM19o( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(14));
 417 reg_def XMM19p( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(15));
 418 
 419 reg_def XMM20 ( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg());
 420 reg_def XMM20b( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(1));
 421 reg_def XMM20c( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(2));
 422 reg_def XMM20d( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(3));
 423 reg_def XMM20e( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(4));
 424 reg_def XMM20f( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(5));
 425 reg_def XMM20g( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(6));
 426 reg_def XMM20h( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(7));
 427 reg_def XMM20i( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(8));
 428 reg_def XMM20j( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(9));
 429 reg_def XMM20k( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(10));
 430 reg_def XMM20l( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(11));
 431 reg_def XMM20m( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(12));
 432 reg_def XMM20n( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(13));
 433 reg_def XMM20o( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(14));
 434 reg_def XMM20p( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(15));
 435 
 436 reg_def XMM21 ( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg());
 437 reg_def XMM21b( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(1));
 438 reg_def XMM21c( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(2));
 439 reg_def XMM21d( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(3));
 440 reg_def XMM21e( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(4));
 441 reg_def XMM21f( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(5));
 442 reg_def XMM21g( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(6));
 443 reg_def XMM21h( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(7));
 444 reg_def XMM21i( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(8));
 445 reg_def XMM21j( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(9));
 446 reg_def XMM21k( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(10));
 447 reg_def XMM21l( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(11));
 448 reg_def XMM21m( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(12));
 449 reg_def XMM21n( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(13));
 450 reg_def XMM21o( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(14));
 451 reg_def XMM21p( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(15));
 452 
 453 reg_def XMM22 ( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg());
 454 reg_def XMM22b( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(1));
 455 reg_def XMM22c( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(2));
 456 reg_def XMM22d( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(3));
 457 reg_def XMM22e( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(4));
 458 reg_def XMM22f( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(5));
 459 reg_def XMM22g( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(6));
 460 reg_def XMM22h( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(7));
 461 reg_def XMM22i( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(8));
 462 reg_def XMM22j( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(9));
 463 reg_def XMM22k( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(10));
 464 reg_def XMM22l( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(11));
 465 reg_def XMM22m( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(12));
 466 reg_def XMM22n( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(13));
 467 reg_def XMM22o( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(14));
 468 reg_def XMM22p( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(15));
 469 
 470 reg_def XMM23 ( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg());
 471 reg_def XMM23b( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(1));
 472 reg_def XMM23c( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(2));
 473 reg_def XMM23d( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(3));
 474 reg_def XMM23e( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(4));
 475 reg_def XMM23f( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(5));
 476 reg_def XMM23g( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(6));
 477 reg_def XMM23h( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(7));
 478 reg_def XMM23i( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(8));
 479 reg_def XMM23j( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(9));
 480 reg_def XMM23k( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(10));
 481 reg_def XMM23l( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(11));
 482 reg_def XMM23m( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(12));
 483 reg_def XMM23n( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(13));
 484 reg_def XMM23o( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(14));
 485 reg_def XMM23p( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(15));
 486 
 487 reg_def XMM24 ( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg());
 488 reg_def XMM24b( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(1));
 489 reg_def XMM24c( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(2));
 490 reg_def XMM24d( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(3));
 491 reg_def XMM24e( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(4));
 492 reg_def XMM24f( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(5));
 493 reg_def XMM24g( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(6));
 494 reg_def XMM24h( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(7));
 495 reg_def XMM24i( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(8));
 496 reg_def XMM24j( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(9));
 497 reg_def XMM24k( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(10));
 498 reg_def XMM24l( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(11));
 499 reg_def XMM24m( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(12));
 500 reg_def XMM24n( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(13));
 501 reg_def XMM24o( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(14));
 502 reg_def XMM24p( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(15));
 503 
 504 reg_def XMM25 ( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg());
 505 reg_def XMM25b( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(1));
 506 reg_def XMM25c( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(2));
 507 reg_def XMM25d( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(3));
 508 reg_def XMM25e( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(4));
 509 reg_def XMM25f( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(5));
 510 reg_def XMM25g( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(6));
 511 reg_def XMM25h( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(7));
 512 reg_def XMM25i( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(8));
 513 reg_def XMM25j( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(9));
 514 reg_def XMM25k( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(10));
 515 reg_def XMM25l( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(11));
 516 reg_def XMM25m( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(12));
 517 reg_def XMM25n( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(13));
 518 reg_def XMM25o( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(14));
 519 reg_def XMM25p( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(15));
 520 
 521 reg_def XMM26 ( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg());
 522 reg_def XMM26b( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(1));
 523 reg_def XMM26c( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(2));
 524 reg_def XMM26d( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(3));
 525 reg_def XMM26e( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(4));
 526 reg_def XMM26f( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(5));
 527 reg_def XMM26g( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(6));
 528 reg_def XMM26h( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(7));
 529 reg_def XMM26i( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(8));
 530 reg_def XMM26j( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(9));
 531 reg_def XMM26k( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(10));
 532 reg_def XMM26l( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(11));
 533 reg_def XMM26m( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(12));
 534 reg_def XMM26n( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(13));
 535 reg_def XMM26o( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(14));
 536 reg_def XMM26p( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(15));
 537 
 538 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(1));
 539 reg_def XMM27c( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(2));
 540 reg_def XMM27d( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(3));
 541 reg_def XMM27e( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(4));
 542 reg_def XMM27f( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(5));
 543 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(6));
 544 reg_def XMM27h( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(7));
 545 reg_def XMM27i( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(8));
 546 reg_def XMM27j( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(9));
 547 reg_def XMM27k( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(10));
 548 reg_def XMM27l( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(11));
 549 reg_def XMM27m( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(12));
 550 reg_def XMM27n( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(13));
 551 reg_def XMM27o( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(14));
 552 reg_def XMM27p( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(15));
 553 
 554 reg_def XMM28 ( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg());
 555 reg_def XMM28b( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(1));
 556 reg_def XMM28c( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(2));
 557 reg_def XMM28d( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(3));
 558 reg_def XMM28e( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(4));
 559 reg_def XMM28f( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(5));
 560 reg_def XMM28g( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(6));
 561 reg_def XMM28h( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(7));
 562 reg_def XMM28i( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(8));
 563 reg_def XMM28j( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(9));
 564 reg_def XMM28k( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(10));
 565 reg_def XMM28l( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(11));
 566 reg_def XMM28m( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(12));
 567 reg_def XMM28n( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(13));
 568 reg_def XMM28o( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(14));
 569 reg_def XMM28p( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(15));
 570 
 571 reg_def XMM29 ( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg());
 572 reg_def XMM29b( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(1));
 573 reg_def XMM29c( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(2));
 574 reg_def XMM29d( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(3));
 575 reg_def XMM29e( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(4));
 576 reg_def XMM29f( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(5));
 577 reg_def XMM29g( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(6));
 578 reg_def XMM29h( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(7));
 579 reg_def XMM29i( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(8));
 580 reg_def XMM29j( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(9));
 581 reg_def XMM29k( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(10));
 582 reg_def XMM29l( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(11));
 583 reg_def XMM29m( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(12));
 584 reg_def XMM29n( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(13));
 585 reg_def XMM29o( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(14));
 586 reg_def XMM29p( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(15));
 587 
 588 reg_def XMM30 ( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg());
 589 reg_def XMM30b( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(1));
 590 reg_def XMM30c( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(2));
 591 reg_def XMM30d( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(3));
 592 reg_def XMM30e( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(4));
 593 reg_def XMM30f( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(5));
 594 reg_def XMM30g( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(6));
 595 reg_def XMM30h( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(7));
 596 reg_def XMM30i( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(8));
 597 reg_def XMM30j( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(9));
 598 reg_def XMM30k( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(10));
 599 reg_def XMM30l( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(11));
 600 reg_def XMM30m( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(12));
 601 reg_def XMM30n( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(13));
 602 reg_def XMM30o( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(14));
 603 reg_def XMM30p( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(15));
 604 
 605 reg_def XMM31 ( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg());
 606 reg_def XMM31b( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(1));
 607 reg_def XMM31c( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(2));
 608 reg_def XMM31d( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(3));
 609 reg_def XMM31e( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(4));
 610 reg_def XMM31f( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(5));
 611 reg_def XMM31g( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(6));
 612 reg_def XMM31h( SOC, SOE, Op_RegF, 31, xmm31>-as_VMReg()->next(7));
 613 reg_def XMM31i( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(8));
 614 reg_def XMM31j( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(9));
 615 reg_def XMM31k( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(10));
 616 reg_def XMM31l( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(11));
 617 reg_def XMM31m( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(12));
 618 reg_def XMM31n( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(13));
 619 reg_def XMM31o( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(14));
 620 reg_def XMM31p( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(15));
 621 
 622 #else // _WIN64
 623 
 624 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
 625 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
 626 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
 627 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
 628 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
 629 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
 630 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
 631 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
 632 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
 633 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
 634 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
 635 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
 636 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
 637 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
 638 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
 639 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
 640 
 641 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
 642 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
 643 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
 644 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
 645 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
 646 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
 647 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
 648 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
 649 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
 650 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
 651 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
 652 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
 653 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
 654 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
 655 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
 656 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
 657 
 658 #ifdef _LP64
 659 
 660 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
 661 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
 662 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
 663 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
 664 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
 665 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
 666 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
 667 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
 668 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
 669 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
 670 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
 671 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
 672 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
 673 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
 674 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
 675 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
 676 
 677 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
 678 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
 679 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
 680 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
 681 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
 682 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
 683 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
 684 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
 685 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
 686 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
 687 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
 688 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
 689 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
 690 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
 691 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
 692 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
 693 
 694 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 695 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
 696 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
 697 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
 698 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
 699 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
 700 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
 701 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
 702 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
 703 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
 704 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
 705 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
 706 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
 707 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
 708 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
 709 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
 710 
 711 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 712 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
 713 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
 714 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
 715 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
 716 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
 717 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
 718 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
 719 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
 720 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
 721 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
 722 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
 723 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
 724 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
 725 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
 726 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
 727 
 728 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 729 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
 730 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
 731 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
 732 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
 733 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
 734 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
 735 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
 736 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
 737 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
 738 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
 739 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
 740 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
 741 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
 742 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
 743 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
 744 
 745 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 746 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
 747 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
 748 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
 749 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
 750 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
 751 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
 752 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
 753 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
 754 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
 755 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
 756 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
 757 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
 758 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
 759 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
 760 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
 761 
 762 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 763 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
 764 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
 765 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
 766 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
 767 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
 768 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
 769 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
 770 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
 771 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
 772 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
 773 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
 774 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
 775 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
 776 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
 777 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
 778 
 779 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 780 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
 781 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
 782 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
 783 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
 784 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
 785 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
 786 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
 787 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
 788 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
 789 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
 790 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
 791 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
 792 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
 793 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
 794 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
 795 
 796 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
 797 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
 798 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
 799 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
 800 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
 801 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
 802 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
 803 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
 804 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
 805 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
 806 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
 807 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
 808 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
 809 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
 810 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
 811 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
 812 
 813 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
 814 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
 815 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
 816 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
 817 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
 818 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
 819 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
 820 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
 821 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
 822 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
 823 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
 824 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
 825 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
 826 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
 827 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
 828 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
 829 
 830 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
 831 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
 832 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
 833 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
 834 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
 835 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
 836 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
 837 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
 838 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
 839 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
 840 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
 841 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
 842 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
 843 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
 844 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
 845 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
 846 
 847 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
 848 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
 849 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
 850 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
 851 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
 852 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
 853 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
 854 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
 855 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
 856 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
 857 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
 858 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
 859 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
 860 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
 861 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
 862 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
 863 
 864 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
 865 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
 866 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
 867 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
 868 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
 869 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
 870 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
 871 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
 872 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
 873 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
 874 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
 875 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
 876 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
 877 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
 878 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
 879 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
 880 
 881 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
 882 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
 883 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
 884 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
 885 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
 886 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
 887 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
 888 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
 889 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
 890 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
 891 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
 892 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
 893 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
 894 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
 895 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
 896 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
 897 
 898 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
 899 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
 900 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
 901 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
 902 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
 903 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
 904 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
 905 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
 906 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
 907 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
 908 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
 909 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
 910 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
 911 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
 912 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
 913 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
 914 
 915 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
 916 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
 917 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
 918 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
 919 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
 920 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
 921 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
 922 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
 923 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
 924 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
 925 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
 926 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
 927 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
 928 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
 929 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
 930 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
 931 
 932 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
 933 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
 934 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
 935 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
 936 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
 937 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
 938 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
 939 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
 940 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
 941 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
 942 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
 943 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
 944 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
 945 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
 946 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
 947 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
 948 
 949 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
 950 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
 951 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
 952 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
 953 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
 954 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
 955 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
 956 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
 957 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
 958 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
 959 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
 960 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
 961 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
 962 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
 963 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
 964 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
 965 
 966 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
 967 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
 968 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
 969 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
 970 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
 971 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
 972 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
 973 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
 974 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
 975 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
 976 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
 977 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
 978 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
 979 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
 980 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
 981 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
 982 
 983 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
 984 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
 985 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
 986 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
 987 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
 988 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
 989 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
 990 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
 991 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
 992 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
 993 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
 994 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
 995 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
 996 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
 997 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
 998 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
 999 
1000 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
1001 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
1002 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
1003 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
1004 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
1005 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
1006 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
1007 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
1008 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
1009 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
1010 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
1011 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
1012 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
1013 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
1014 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
1015 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
1016 
1017 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
1018 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
1019 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
1020 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
1021 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
1022 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
1023 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
1024 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
1025 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
1026 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
1027 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
1028 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
1029 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
1030 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
1031 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
1032 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
1033 
1034 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
1035 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
1036 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
1037 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
1038 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
1039 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
1040 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
1041 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
1042 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
1043 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
1044 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
1045 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
1046 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
1047 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
1048 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
1049 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
1050 
1051 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
1052 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
1053 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
1054 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
1055 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
1056 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
1057 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
1058 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
1059 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
1060 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
1061 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
1062 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
1063 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
1064 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
1065 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
1066 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
1067 
1068 #endif // _LP64
1069 
1070 #endif // _WIN64
1071 
1072 #ifdef _LP64
1073 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
1074 #else
1075 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
1076 #endif // _LP64
1077 
1078 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
1079                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
1080                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
1081                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
1082                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
1083                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
1084                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
1085                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p
1086 #ifdef _LP64
1087                   ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
1088                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
1089                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1090                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1091                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1092                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1093                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1094                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
1095                   ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1096                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1097                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1098                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1099                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1100                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1101                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1102                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1103                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1104                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1105                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1106                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1107                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1108                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1109                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1110                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p
1111 #endif
1112                       );
1113 
1114 // flags allocation class should be last.
1115 alloc_class chunk3(RFLAGS);
1116 
1117 // Singleton class for condition codes
1118 reg_class int_flags(RFLAGS);
1119 
1120 // Class for pre evex float registers
1121 reg_class float_reg_legacy(XMM0,
1122                     XMM1,
1123                     XMM2,
1124                     XMM3,
1125                     XMM4,
1126                     XMM5,
1127                     XMM6,
1128                     XMM7
1129 #ifdef _LP64
1130                    ,XMM8,
1131                     XMM9,
1132                     XMM10,
1133                     XMM11,
1134                     XMM12,
1135                     XMM13,
1136                     XMM14,
1137                     XMM15
1138 #endif
1139                     );
1140 
1141 // Class for evex float registers
1142 reg_class float_reg_evex(XMM0,
1143                     XMM1,
1144                     XMM2,
1145                     XMM3,
1146                     XMM4,
1147                     XMM5,
1148                     XMM6,
1149                     XMM7
1150 #ifdef _LP64
1151                    ,XMM8,
1152                     XMM9,
1153                     XMM10,
1154                     XMM11,
1155                     XMM12,
1156                     XMM13,
1157                     XMM14,
1158                     XMM15,
1159                     XMM16,
1160                     XMM17,
1161                     XMM18,
1162                     XMM19,
1163                     XMM20,
1164                     XMM21,
1165                     XMM22,
1166                     XMM23,
1167                     XMM24,
1168                     XMM25,
1169                     XMM26,
1170                     XMM27,
1171                     XMM28,
1172                     XMM29,
1173                     XMM30,
1174                     XMM31
1175 #endif
1176                     );
1177 
1178 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1179 
1180 // Class for pre evex double registers
1181 reg_class double_reg_legacy(XMM0,  XMM0b,
1182                      XMM1,  XMM1b,
1183                      XMM2,  XMM2b,
1184                      XMM3,  XMM3b,
1185                      XMM4,  XMM4b,
1186                      XMM5,  XMM5b,
1187                      XMM6,  XMM6b,
1188                      XMM7,  XMM7b
1189 #ifdef _LP64
1190                     ,XMM8,  XMM8b,
1191                      XMM9,  XMM9b,
1192                      XMM10, XMM10b,
1193                      XMM11, XMM11b,
1194                      XMM12, XMM12b,
1195                      XMM13, XMM13b,
1196                      XMM14, XMM14b,
1197                      XMM15, XMM15b
1198 #endif
1199                      );
1200 
1201 // Class for evex double registers
1202 reg_class double_reg_evex(XMM0,  XMM0b,
1203                      XMM1,  XMM1b,
1204                      XMM2,  XMM2b,
1205                      XMM3,  XMM3b,
1206                      XMM4,  XMM4b,
1207                      XMM5,  XMM5b,
1208                      XMM6,  XMM6b,
1209                      XMM7,  XMM7b
1210 #ifdef _LP64
1211                     ,XMM8,  XMM8b,
1212                      XMM9,  XMM9b,
1213                      XMM10, XMM10b,
1214                      XMM11, XMM11b,
1215                      XMM12, XMM12b,
1216                      XMM13, XMM13b,
1217                      XMM14, XMM14b,
1218                      XMM15, XMM15b,
1219                      XMM16, XMM16b,
1220                      XMM17, XMM17b,
1221                      XMM18, XMM18b,
1222                      XMM19, XMM19b,
1223                      XMM20, XMM20b,
1224                      XMM21, XMM21b,
1225                      XMM22, XMM22b,
1226                      XMM23, XMM23b,
1227                      XMM24, XMM24b,
1228                      XMM25, XMM25b,
1229                      XMM26, XMM26b,
1230                      XMM27, XMM27b,
1231                      XMM28, XMM28b,
1232                      XMM29, XMM29b,
1233                      XMM30, XMM30b,
1234                      XMM31, XMM31b
1235 #endif
1236                      );
1237 
1238 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1239 
1240 // Class for pre evex 32bit vector registers
1241 reg_class vectors_reg_legacy(XMM0,
1242                       XMM1,
1243                       XMM2,
1244                       XMM3,
1245                       XMM4,
1246                       XMM5,
1247                       XMM6,
1248                       XMM7
1249 #ifdef _LP64
1250                      ,XMM8,
1251                       XMM9,
1252                       XMM10,
1253                       XMM11,
1254                       XMM12,
1255                       XMM13,
1256                       XMM14,
1257                       XMM15
1258 #endif
1259                       );
1260 
1261 // Class for evex 32bit vector registers
1262 reg_class vectors_reg_evex(XMM0,
1263                       XMM1,
1264                       XMM2,
1265                       XMM3,
1266                       XMM4,
1267                       XMM5,
1268                       XMM6,
1269                       XMM7
1270 #ifdef _LP64
1271                      ,XMM8,
1272                       XMM9,
1273                       XMM10,
1274                       XMM11,
1275                       XMM12,
1276                       XMM13,
1277                       XMM14,
1278                       XMM15,
1279                       XMM16,
1280                       XMM17,
1281                       XMM18,
1282                       XMM19,
1283                       XMM20,
1284                       XMM21,
1285                       XMM22,
1286                       XMM23,
1287                       XMM24,
1288                       XMM25,
1289                       XMM26,
1290                       XMM27,
1291                       XMM28,
1292                       XMM29,
1293                       XMM30,
1294                       XMM31
1295 #endif
1296                       );
1297 
1298 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1299 
1300 // Class for all 64bit vector registers
1301 reg_class vectord_reg_legacy(XMM0,  XMM0b,
1302                       XMM1,  XMM1b,
1303                       XMM2,  XMM2b,
1304                       XMM3,  XMM3b,
1305                       XMM4,  XMM4b,
1306                       XMM5,  XMM5b,
1307                       XMM6,  XMM6b,
1308                       XMM7,  XMM7b
1309 #ifdef _LP64
1310                      ,XMM8,  XMM8b,
1311                       XMM9,  XMM9b,
1312                       XMM10, XMM10b,
1313                       XMM11, XMM11b,
1314                       XMM12, XMM12b,
1315                       XMM13, XMM13b,
1316                       XMM14, XMM14b,
1317                       XMM15, XMM15b
1318 #endif
1319                       );
1320 
1321 // Class for all 64bit vector registers
1322 reg_class vectord_reg_evex(XMM0,  XMM0b,
1323                       XMM1,  XMM1b,
1324                       XMM2,  XMM2b,
1325                       XMM3,  XMM3b,
1326                       XMM4,  XMM4b,
1327                       XMM5,  XMM5b,
1328                       XMM6,  XMM6b,
1329                       XMM7,  XMM7b
1330 #ifdef _LP64
1331                      ,XMM8,  XMM8b,
1332                       XMM9,  XMM9b,
1333                       XMM10, XMM10b,
1334                       XMM11, XMM11b,
1335                       XMM12, XMM12b,
1336                       XMM13, XMM13b,
1337                       XMM14, XMM14b,
1338                       XMM15, XMM15b,
1339                       XMM16, XMM16b,
1340                       XMM17, XMM17b,
1341                       XMM18, XMM18b,
1342                       XMM19, XMM19b,
1343                       XMM20, XMM20b,
1344                       XMM21, XMM21b,
1345                       XMM22, XMM22b,
1346                       XMM23, XMM23b,
1347                       XMM24, XMM24b,
1348                       XMM25, XMM25b,
1349                       XMM26, XMM26b,
1350                       XMM27, XMM27b,
1351                       XMM28, XMM28b,
1352                       XMM29, XMM29b,
1353                       XMM30, XMM30b,
1354                       XMM31, XMM31b
1355 #endif
1356                       );
1357 
1358 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1359 
1360 // Class for all 128bit vector registers
1361 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
1362                       XMM1,  XMM1b,  XMM1c,  XMM1d,
1363                       XMM2,  XMM2b,  XMM2c,  XMM2d,
1364                       XMM3,  XMM3b,  XMM3c,  XMM3d,
1365                       XMM4,  XMM4b,  XMM4c,  XMM4d,
1366                       XMM5,  XMM5b,  XMM5c,  XMM5d,
1367                       XMM6,  XMM6b,  XMM6c,  XMM6d,
1368                       XMM7,  XMM7b,  XMM7c,  XMM7d
1369 #ifdef _LP64
1370                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,
1371                       XMM9,  XMM9b,  XMM9c,  XMM9d,
1372                       XMM10, XMM10b, XMM10c, XMM10d,
1373                       XMM11, XMM11b, XMM11c, XMM11d,
1374                       XMM12, XMM12b, XMM12c, XMM12d,
1375                       XMM13, XMM13b, XMM13c, XMM13d,
1376                       XMM14, XMM14b, XMM14c, XMM14d,
1377                       XMM15, XMM15b, XMM15c, XMM15d
1378 #endif
1379                       );
1380 
1381 // Class for all 128bit vector registers
1382 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
1383                       XMM1,  XMM1b,  XMM1c,  XMM1d,
1384                       XMM2,  XMM2b,  XMM2c,  XMM2d,
1385                       XMM3,  XMM3b,  XMM3c,  XMM3d,
1386                       XMM4,  XMM4b,  XMM4c,  XMM4d,
1387                       XMM5,  XMM5b,  XMM5c,  XMM5d,
1388                       XMM6,  XMM6b,  XMM6c,  XMM6d,
1389                       XMM7,  XMM7b,  XMM7c,  XMM7d
1390 #ifdef _LP64
1391                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,
1392                       XMM9,  XMM9b,  XMM9c,  XMM9d,
1393                       XMM10, XMM10b, XMM10c, XMM10d,
1394                       XMM11, XMM11b, XMM11c, XMM11d,
1395                       XMM12, XMM12b, XMM12c, XMM12d,
1396                       XMM13, XMM13b, XMM13c, XMM13d,
1397                       XMM14, XMM14b, XMM14c, XMM14d,
1398                       XMM15, XMM15b, XMM15c, XMM15d,
1399                       XMM16, XMM16b, XMM16c, XMM16d,
1400                       XMM17, XMM17b, XMM17c, XMM17d,
1401                       XMM18, XMM18b, XMM18c, XMM18d,
1402                       XMM19, XMM19b, XMM19c, XMM19d,
1403                       XMM20, XMM20b, XMM20c, XMM20d,
1404                       XMM21, XMM21b, XMM21c, XMM21d,
1405                       XMM22, XMM22b, XMM22c, XMM22d,
1406                       XMM23, XMM23b, XMM23c, XMM23d,
1407                       XMM24, XMM24b, XMM24c, XMM24d,
1408                       XMM25, XMM25b, XMM25c, XMM25d,
1409                       XMM26, XMM26b, XMM26c, XMM26d,
1410                       XMM27, XMM27b, XMM27c, XMM27d,
1411                       XMM28, XMM28b, XMM28c, XMM28d,
1412                       XMM29, XMM29b, XMM29c, XMM29d,
1413                       XMM30, XMM30b, XMM30c, XMM30d,
1414                       XMM31, XMM31b, XMM31c, XMM31d
1415 #endif
1416                       );
1417 
1418 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1419 
1420 // Class for all 256bit vector registers
1421 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
1422                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
1423                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
1424                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
1425                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
1426                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
1427                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
1428                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
1429 #ifdef _LP64
1430                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
1431                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
1432                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1433                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1434                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1435                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1436                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1437                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
1438 #endif
1439                       );
1440 
1441 // Class for all 256bit vector registers
1442 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
1443                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
1444                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
1445                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
1446                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
1447                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
1448                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
1449                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
1450 #ifdef _LP64
1451                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
1452                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
1453                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1454                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1455                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1456                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1457                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1458                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1459                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1460                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1461                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1462                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1463                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1464                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1465                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1466                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1467                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1468                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1469                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1470                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1471                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1472                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1473                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1474                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h
1475 #endif
1476                       );
1477 
1478 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1479 
1480 // Class for all 512bit vector registers
1481 reg_class vectorz_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
1482                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
1483                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
1484                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
1485                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
1486                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
1487                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
1488                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p
1489 #ifdef _LP64
1490                      ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
1491                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
1492                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1493                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1494                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1495                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1496                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1497                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
1498                      ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1499                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1500                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1501                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1502                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1503                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1504                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1505                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1506                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1507                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1508                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1509                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1510                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1511                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1512                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1513                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p
1514 #endif
1515                       );
1516 
1517 %}
1518 
1519 
1520 //----------SOURCE BLOCK-------------------------------------------------------
1521 // This is a block of C++ code which provides values, functions, and
1522 // definitions necessary in the rest of the architecture description
1523 
1524 source_hpp %{
1525 // Header information of the source block.
1526 // Method declarations/definitions which are used outside
1527 // the ad-scope can conveniently be defined here.
1528 //
1529 // To keep related declarations/definitions/uses close together,
1530 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
1531 
1532 class NativeJump;
1533 
1534 class CallStubImpl {
1535 
1536   //--------------------------------------------------------------


1648   static address float_signflip()  { return (address)float_signflip_pool; }
1649   static address double_signmask() { return (address)double_signmask_pool; }
1650   static address double_signflip() { return (address)double_signflip_pool; }
1651 #endif
1652 
1653 
1654 const bool Matcher::match_rule_supported(int opcode) {
1655   if (!has_match_rule(opcode))
1656     return false;
1657 
1658   switch (opcode) {
1659     case Op_PopCountI:
1660     case Op_PopCountL:
1661       if (!UsePopCountInstruction)
1662         return false;
1663     break;
1664     case Op_MulVI:
1665       if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
1666         return false;
1667     break;
1668     case Op_MulVL:
1669     case Op_MulReductionVL:
1670       if (VM_Version::supports_avx512dq() == false)
1671         return false;
1672     case Op_AddReductionVL:
1673       if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here
1674         return false;
1675     case Op_AddReductionVI:
1676       if (UseSSE < 3) // requires at least SSE3
1677         return false;
1678     case Op_MulReductionVI:
1679       if (UseSSE < 4) // requires at least SSE4
1680         return false;
1681     case Op_AddReductionVF:
1682     case Op_AddReductionVD:
1683     case Op_MulReductionVF:
1684     case Op_MulReductionVD:
1685       if (UseSSE < 1) // requires at least SSE
1686         return false;
1687     break;
1688     case Op_CompareAndSwapL:
1689 #ifdef _LP64
1690     case Op_CompareAndSwapP:
1691 #endif
1692       if (!VM_Version::supports_cx8())
1693         return false;
1694     break;
1695   }
1696 
1697   return true;  // Per default match rules are supported.
1698 }
1699 
1700 // Max vector size in bytes. 0 if not supported.
1701 const int Matcher::vector_width_in_bytes(BasicType bt) {
1702   assert(is_java_primitive(bt), "only primitive type vectors");
1703   if (UseSSE < 2) return 0;
1704   // SSE2 supports 128bit vectors for all types.
1705   // AVX2 supports 256bit vectors for all types.
1706   // AVX2/EVEX supports 512bit vectors for all types.
1707   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
1708   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
1709   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
1710     size = (UseAVX > 2) ? 64 : 32;
1711   // Use flag to limit vector size.
1712   size = MIN2(size,(int)MaxVectorSize);
1713   // Minimum 2 values in vector (or 4 for bytes).
1714   switch (bt) {
1715   case T_DOUBLE:
1716   case T_LONG:
1717     if (size < 16) return 0;
1718   case T_FLOAT:
1719   case T_INT:
1720     if (size < 8) return 0;
1721   case T_BOOLEAN:
1722   case T_BYTE:
1723   case T_CHAR:
1724   case T_SHORT:
1725     if (size < 4) return 0;
1726     break;
1727   default:
1728     ShouldNotReachHere();
1729   }
1730   return size;


1732 
1733 // Limits on vector size (number of elements) loaded into vector.
1734 const int Matcher::max_vector_size(const BasicType bt) {
1735   return vector_width_in_bytes(bt)/type2aelembytes(bt);
1736 }
1737 const int Matcher::min_vector_size(const BasicType bt) {
1738   int max_size = max_vector_size(bt);
1739   // Min size which can be loaded into vector is 4 bytes.
1740   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
1741   return MIN2(size,max_size);
1742 }
1743 
1744 // Vector ideal reg corresponding to specidied size in bytes
1745 const int Matcher::vector_ideal_reg(int size) {
1746   assert(MaxVectorSize >= size, "");
1747   switch(size) {
1748     case  4: return Op_VecS;
1749     case  8: return Op_VecD;
1750     case 16: return Op_VecX;
1751     case 32: return Op_VecY;
1752     case 64: return Op_VecZ;
1753   }
1754   ShouldNotReachHere();
1755   return 0;
1756 }
1757 
1758 // Only lowest bits of xmm reg are used for vector shift count.
1759 const int Matcher::vector_shift_count_ideal_reg(int size) {
1760   return Op_VecS;
1761 }
1762 
1763 // x86 supports misaligned vectors store/load.
1764 const bool Matcher::misaligned_vectors_ok() {
1765   return !AlignVector; // can be changed by flag
1766 }
1767 
1768 // x86 AES instructions are compatible with SunJCE expanded
1769 // keys, hence we do not need to pass the original key to stubs
1770 const bool Matcher::pass_original_key_for_aes() {
1771   return false;
1772 }


1776                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
1777   // In 64-bit VM size calculation is very complex. Emitting instructions
1778   // into scratch buffer is used to get size in 64-bit VM.
1779   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
1780   assert(ireg == Op_VecS || // 32bit vector
1781          (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
1782          (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
1783          "no non-adjacent vector moves" );
1784   if (cbuf) {
1785     MacroAssembler _masm(cbuf);
1786     int offset = __ offset();
1787     switch (ireg) {
1788     case Op_VecS: // copy whole register
1789     case Op_VecD:
1790     case Op_VecX:
1791       __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1792       break;
1793     case Op_VecY:
1794       __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1795       break;
1796     case Op_VecZ:
1797       __ evmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
1798       break;
1799     default:
1800       ShouldNotReachHere();
1801     }
1802     int size = __ offset() - offset;
1803 #ifdef ASSERT
1804     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
1805     assert(!do_size || size == 4, "incorrect size calculattion");
1806 #endif
1807     return size;
1808 #ifndef PRODUCT
1809   } else if (!do_size) {
1810     switch (ireg) {
1811     case Op_VecS:
1812     case Op_VecD:
1813     case Op_VecX:
1814       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
1815       break;
1816     case Op_VecY:
1817     case Op_VecZ:
1818       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
1819       break;
1820     default:
1821       ShouldNotReachHere();
1822     }
1823 #endif
1824   }
1825   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
1826   return (UseAVX > 2) ? 6 : 4;
1827 }
1828 
1829 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
1830                             int stack_offset, int reg, uint ireg, outputStream* st) {
1831   // In 64-bit VM size calculation is very complex. Emitting instructions
1832   // into scratch buffer is used to get size in 64-bit VM.
1833   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
1834   if (cbuf) {
1835     MacroAssembler _masm(cbuf);
1836     int offset = __ offset();
1837     if (is_load) {
1838       switch (ireg) {
1839       case Op_VecS:
1840         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1841         break;
1842       case Op_VecD:
1843         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1844         break;
1845       case Op_VecX:
1846         __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1847         break;
1848       case Op_VecY:
1849         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1850         break;
1851       case Op_VecZ:
1852         __ evmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
1853         break;
1854       default:
1855         ShouldNotReachHere();
1856       }
1857     } else { // store
1858       switch (ireg) {
1859       case Op_VecS:
1860         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1861         break;
1862       case Op_VecD:
1863         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1864         break;
1865       case Op_VecX:
1866         __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1867         break;
1868       case Op_VecY:
1869         __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1870         break;
1871       case Op_VecZ:
1872         __ evmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
1873         break;
1874       default:
1875         ShouldNotReachHere();
1876       }
1877     }
1878     int size = __ offset() - offset;
1879 #ifdef ASSERT
1880     int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4);
1881     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
1882     assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
1883 #endif
1884     return size;
1885 #ifndef PRODUCT
1886   } else if (!do_size) {
1887     if (is_load) {
1888       switch (ireg) {
1889       case Op_VecS:
1890         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1891         break;
1892       case Op_VecD:
1893         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1894         break;
1895        case Op_VecX:
1896         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1897         break;
1898       case Op_VecY:
1899       case Op_VecZ:
1900         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1901         break;
1902       default:
1903         ShouldNotReachHere();
1904       }
1905     } else { // store
1906       switch (ireg) {
1907       case Op_VecS:
1908         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1909         break;
1910       case Op_VecD:
1911         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1912         break;
1913        case Op_VecX:
1914         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1915         break;
1916       case Op_VecY:
1917       case Op_VecZ:
1918         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1919         break;
1920       default:
1921         ShouldNotReachHere();
1922       }
1923     }
1924 #endif
1925   }
1926   int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4);
1927   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
1928   return 5+offset_size;
1929 }
1930 
1931 static inline jfloat replicate4_imm(int con, int width) {
1932   // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
1933   assert(width == 1 || width == 2, "only byte or short types here");
1934   int bit_width = width * 8;
1935   jint val = con;
1936   val &= (1 << bit_width) - 1;  // mask off sign bits
1937   while(bit_width < 32) {
1938     val |= (val << bit_width);
1939     bit_width <<= 1;
1940   }
1941   jfloat fval = *((jfloat*) &val);  // coerce to float type
1942   return fval;
1943 }
1944 
1945 static inline jdouble replicate8_imm(int con, int width) {
1946   // Load a constant of "width" (in bytes) and replicate it to fill 64bit.


2010       // Check that stack depth is unchanged: find majik cookie on stack
2011       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
2012       MacroAssembler _masm(&cbuf);
2013       Label L;
2014       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
2015       __ jccb(Assembler::equal, L);
2016       // Die if stack mismatch
2017       __ int3();
2018       __ bind(L);
2019     }
2020   %}
2021 
2022 %}
2023 
2024 
2025 //----------OPERANDS-----------------------------------------------------------
2026 // Operand definitions must precede instruction definitions for correct parsing
2027 // in the ADLC because operands constitute user defined types which are used in
2028 // instruction definitions.
2029 
2030 // This one generically applies only for evex, so only one version
2031 operand vecZ() %{
2032   constraint(ALLOC_IN_RC(vectorz_reg));
2033   match(VecZ);
























2034 
2035   format %{ %}
2036   interface(REG_INTER);
2037 %}
2038 

2039 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
2040 
2041 // ============================================================================
2042 
2043 instruct ShouldNotReachHere() %{
2044   match(Halt);
2045   format %{ "int3\t# ShouldNotReachHere" %}
2046   ins_encode %{
2047     __ int3();
2048   %}
2049   ins_pipe(pipe_slow);
2050 %}
2051 
2052 // ============================================================================
2053 
2054 instruct addF_reg(regF dst, regF src) %{
2055   predicate((UseSSE>=1) && (UseAVX == 0));
2056   match(Set dst (AddF dst src));
2057 
2058   format %{ "addss   $dst, $src" %}


2619   ins_pipe(pipe_slow);
2620 %}
2621 
2622 instruct absF_reg(regF dst) %{
2623   predicate((UseSSE>=1) && (UseAVX == 0));
2624   match(Set dst (AbsF dst));
2625   ins_cost(150);
2626   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
2627   ins_encode %{
2628     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
2629   %}
2630   ins_pipe(pipe_slow);
2631 %}
2632 
2633 instruct absF_reg_reg(regF dst, regF src) %{
2634   predicate(UseAVX > 0);
2635   match(Set dst (AbsF src));
2636   ins_cost(150);
2637   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
2638   ins_encode %{
2639     int vector_len = 0;
2640     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
2641               ExternalAddress(float_signmask()), vector_len);
2642   %}
2643   ins_pipe(pipe_slow);
2644 %}
2645 
2646 instruct absD_reg(regD dst) %{
2647   predicate((UseSSE>=2) && (UseAVX == 0));
2648   match(Set dst (AbsD dst));
2649   ins_cost(150);
2650   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
2651             "# abs double by sign masking" %}
2652   ins_encode %{
2653     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
2654   %}
2655   ins_pipe(pipe_slow);
2656 %}
2657 
2658 instruct absD_reg_reg(regD dst, regD src) %{
2659   predicate(UseAVX > 0);
2660   match(Set dst (AbsD src));
2661   ins_cost(150);
2662   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
2663             "# abs double by sign masking" %}
2664   ins_encode %{
2665     int vector_len = 0;
2666     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
2667               ExternalAddress(double_signmask()), vector_len);
2668   %}
2669   ins_pipe(pipe_slow);
2670 %}
2671 
2672 instruct negF_reg(regF dst) %{
2673   predicate((UseSSE>=1) && (UseAVX == 0));
2674   match(Set dst (NegF dst));
2675   ins_cost(150);
2676   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
2677   ins_encode %{
2678     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
2679   %}
2680   ins_pipe(pipe_slow);
2681 %}
2682 
2683 instruct negF_reg_reg(regF dst, regF src) %{
2684   predicate(UseAVX > 0);
2685   match(Set dst (NegF src));
2686   ins_cost(150);
2687   format %{ "vxorps  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
2688   ins_encode %{
2689     int vector_len = 0;
2690     __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
2691               ExternalAddress(float_signflip()), vector_len);
2692   %}
2693   ins_pipe(pipe_slow);
2694 %}
2695 
2696 instruct negD_reg(regD dst) %{
2697   predicate((UseSSE>=2) && (UseAVX == 0));
2698   match(Set dst (NegD dst));
2699   ins_cost(150);
2700   format %{ "xorpd   $dst, [0x8000000000000000]\t"
2701             "# neg double by sign flipping" %}
2702   ins_encode %{
2703     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
2704   %}
2705   ins_pipe(pipe_slow);
2706 %}
2707 
2708 instruct negD_reg_reg(regD dst, regD src) %{
2709   predicate(UseAVX > 0);
2710   match(Set dst (NegD src));
2711   ins_cost(150);
2712   format %{ "vxorpd  $dst, $src, [0x8000000000000000]\t"
2713             "# neg double by sign flipping" %}
2714   ins_encode %{
2715     int vector_len = 0;
2716     __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
2717               ExternalAddress(double_signflip()), vector_len);
2718   %}
2719   ins_pipe(pipe_slow);
2720 %}
2721 
2722 instruct sqrtF_reg(regF dst, regF src) %{
2723   predicate(UseSSE>=1);
2724   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
2725 
2726   format %{ "sqrtss  $dst, $src" %}
2727   ins_cost(150);
2728   ins_encode %{
2729     __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
2730   %}
2731   ins_pipe(pipe_slow);
2732 %}
2733 
2734 instruct sqrtF_mem(regF dst, memory src) %{
2735   predicate(UseSSE>=1);
2736   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
2737 


2772 
2773   format %{ "sqrtsd  $dst, $src" %}
2774   ins_cost(150);
2775   ins_encode %{
2776     __ sqrtsd($dst$$XMMRegister, $src$$Address);
2777   %}
2778   ins_pipe(pipe_slow);
2779 %}
2780 
2781 instruct sqrtD_imm(regD dst, immD con) %{
2782   predicate(UseSSE>=2);
2783   match(Set dst (SqrtD con));
2784   format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2785   ins_cost(150);
2786   ins_encode %{
2787     __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
2788   %}
2789   ins_pipe(pipe_slow);
2790 %}
2791 

2792 // ====================VECTOR INSTRUCTIONS=====================================
2793 
2794 // Load vectors (4 bytes long)
2795 instruct loadV4(vecS dst, memory mem) %{
2796   predicate(n->as_LoadVector()->memory_size() == 4);
2797   match(Set dst (LoadVector mem));
2798   ins_cost(125);
2799   format %{ "movd    $dst,$mem\t! load vector (4 bytes)" %}
2800   ins_encode %{
2801     __ movdl($dst$$XMMRegister, $mem$$Address);
2802   %}
2803   ins_pipe( pipe_slow );
2804 %}
2805 
2806 // Load vectors (8 bytes long)
2807 instruct loadV8(vecD dst, memory mem) %{
2808   predicate(n->as_LoadVector()->memory_size() == 8);
2809   match(Set dst (LoadVector mem));
2810   ins_cost(125);
2811   format %{ "movq    $dst,$mem\t! load vector (8 bytes)" %}


2822   ins_cost(125);
2823   format %{ "movdqu  $dst,$mem\t! load vector (16 bytes)" %}
2824   ins_encode %{
2825     __ movdqu($dst$$XMMRegister, $mem$$Address);
2826   %}
2827   ins_pipe( pipe_slow );
2828 %}
2829 
2830 // Load vectors (32 bytes long)
2831 instruct loadV32(vecY dst, memory mem) %{
2832   predicate(n->as_LoadVector()->memory_size() == 32);
2833   match(Set dst (LoadVector mem));
2834   ins_cost(125);
2835   format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
2836   ins_encode %{
2837     __ vmovdqu($dst$$XMMRegister, $mem$$Address);
2838   %}
2839   ins_pipe( pipe_slow );
2840 %}
2841 
2842 // Load vectors (64 bytes long)
2843 instruct loadV64(vecZ dst, memory mem) %{
2844   predicate(n->as_LoadVector()->memory_size() == 64);
2845   match(Set dst (LoadVector mem));
2846   ins_cost(125);
2847   format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %}
2848   ins_encode %{
2849     int vector_len = 2;
2850     __ evmovdqu($dst$$XMMRegister, $mem$$Address, vector_len);
2851   %}
2852   ins_pipe( pipe_slow );
2853 %}
2854 
2855 // Store vectors
2856 instruct storeV4(memory mem, vecS src) %{
2857   predicate(n->as_StoreVector()->memory_size() == 4);
2858   match(Set mem (StoreVector mem src));
2859   ins_cost(145);
2860   format %{ "movd    $mem,$src\t! store vector (4 bytes)" %}
2861   ins_encode %{
2862     __ movdl($mem$$Address, $src$$XMMRegister);
2863   %}
2864   ins_pipe( pipe_slow );
2865 %}
2866 
2867 instruct storeV8(memory mem, vecD src) %{
2868   predicate(n->as_StoreVector()->memory_size() == 8);
2869   match(Set mem (StoreVector mem src));
2870   ins_cost(145);
2871   format %{ "movq    $mem,$src\t! store vector (8 bytes)" %}
2872   ins_encode %{
2873     __ movq($mem$$Address, $src$$XMMRegister);
2874   %}


2880   match(Set mem (StoreVector mem src));
2881   ins_cost(145);
2882   format %{ "movdqu  $mem,$src\t! store vector (16 bytes)" %}
2883   ins_encode %{
2884     __ movdqu($mem$$Address, $src$$XMMRegister);
2885   %}
2886   ins_pipe( pipe_slow );
2887 %}
2888 
2889 instruct storeV32(memory mem, vecY src) %{
2890   predicate(n->as_StoreVector()->memory_size() == 32);
2891   match(Set mem (StoreVector mem src));
2892   ins_cost(145);
2893   format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
2894   ins_encode %{
2895     __ vmovdqu($mem$$Address, $src$$XMMRegister);
2896   %}
2897   ins_pipe( pipe_slow );
2898 %}
2899 
2900 instruct storeV64(memory mem, vecZ src) %{
2901   predicate(n->as_StoreVector()->memory_size() == 64);
2902   match(Set mem (StoreVector mem src));
2903   ins_cost(145);
2904   format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %}
2905   ins_encode %{
2906     int vector_len = 2;
2907     __ evmovdqu($mem$$Address, $src$$XMMRegister, vector_len);
2908   %}
2909   ins_pipe( pipe_slow );
2910 %}
2911 
2912 // Replicate byte scalar to be vector
2913 instruct Repl4B(vecS dst, rRegI src) %{
2914   predicate(n->as_Vector()->length() == 4);
2915   match(Set dst (ReplicateB src));
2916   format %{ "movd    $dst,$src\n\t"
2917             "punpcklbw $dst,$dst\n\t"
2918             "pshuflw $dst,$dst,0x00\t! replicate4B" %}
2919   ins_encode %{
2920     __ movdl($dst$$XMMRegister, $src$$Register);
2921     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
2922     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2923   %}
2924   ins_pipe( pipe_slow );
2925 %}
2926 
2927 instruct Repl8B(vecD dst, rRegI src) %{
2928   predicate(n->as_Vector()->length() == 8);
2929   match(Set dst (ReplicateB src));
2930   format %{ "movd    $dst,$src\n\t"
2931             "punpcklbw $dst,$dst\n\t"


2955 %}
2956 
2957 instruct Repl32B(vecY dst, rRegI src) %{
2958   predicate(n->as_Vector()->length() == 32);
2959   match(Set dst (ReplicateB src));
2960   format %{ "movd    $dst,$src\n\t"
2961             "punpcklbw $dst,$dst\n\t"
2962             "pshuflw $dst,$dst,0x00\n\t"
2963             "punpcklqdq $dst,$dst\n\t"
2964             "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
2965   ins_encode %{
2966     __ movdl($dst$$XMMRegister, $src$$Register);
2967     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
2968     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2969     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2970     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2971   %}
2972   ins_pipe( pipe_slow );
2973 %}
2974 
2975 instruct Repl64B(vecZ dst, rRegI src) %{
2976   predicate(n->as_Vector()->length() == 64);
2977   match(Set dst (ReplicateB src));
2978   format %{ "movd    $dst,$src\n\t"
2979             "punpcklbw $dst,$dst\n\t"
2980             "pshuflw $dst,$dst,0x00\n\t"
2981             "punpcklqdq $dst,$dst\n\t"
2982             "vinserti128h $dst,$dst,$dst\t! lower replicate32B\n\t"
2983             "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate632B" %}
2984   ins_encode %{
2985     __ movdl($dst$$XMMRegister, $src$$Register);
2986     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
2987     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2988     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2989     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2990     __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2991   %}
2992   ins_pipe( pipe_slow );
2993 %}
2994 
2995 // Replicate byte scalar immediate to be vector by loading from const table.
2996 instruct Repl4B_imm(vecS dst, immI con) %{
2997   predicate(n->as_Vector()->length() == 4);
2998   match(Set dst (ReplicateB con));
2999   format %{ "movdl   $dst,[$constantaddress]\t! replicate4B($con)" %}
3000   ins_encode %{
3001     __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
3002   %}
3003   ins_pipe( pipe_slow );
3004 %}
3005 
3006 instruct Repl8B_imm(vecD dst, immI con) %{
3007   predicate(n->as_Vector()->length() == 8);
3008   match(Set dst (ReplicateB con));
3009   format %{ "movq    $dst,[$constantaddress]\t! replicate8B($con)" %}
3010   ins_encode %{
3011     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3012   %}
3013   ins_pipe( pipe_slow );
3014 %}


3022     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3023     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3024   %}
3025   ins_pipe( pipe_slow );
3026 %}
3027 
3028 instruct Repl32B_imm(vecY dst, immI con) %{
3029   predicate(n->as_Vector()->length() == 32);
3030   match(Set dst (ReplicateB con));
3031   format %{ "movq    $dst,[$constantaddress]\n\t"
3032             "punpcklqdq $dst,$dst\n\t"
3033             "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
3034   ins_encode %{
3035     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3036     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3037     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3038   %}
3039   ins_pipe( pipe_slow );
3040 %}
3041 
3042 instruct Repl64B_imm(vecZ dst, immI con) %{
3043   predicate(n->as_Vector()->length() == 64);
3044   match(Set dst (ReplicateB con));
3045   format %{ "movq    $dst,[$constantaddress]\n\t"
3046             "punpcklqdq $dst,$dst\n\t"
3047             "vinserti128h $dst,$dst,$dst\t! lower replicate32B($con)\n\t"
3048             "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate32B($con)" %}
3049   ins_encode %{
3050     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3051     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3052     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3053     __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3054   %}
3055   ins_pipe( pipe_slow );
3056 %}
3057 
3058 // Replicate byte scalar zero to be vector
3059 instruct Repl4B_zero(vecS dst, immI0 zero) %{
3060   predicate(n->as_Vector()->length() == 4);
3061   match(Set dst (ReplicateB zero));
3062   format %{ "pxor    $dst,$dst\t! replicate4B zero" %}
3063   ins_encode %{
3064     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3065   %}
3066   ins_pipe( fpu_reg_reg );
3067 %}
3068 
3069 instruct Repl8B_zero(vecD dst, immI0 zero) %{
3070   predicate(n->as_Vector()->length() == 8);
3071   match(Set dst (ReplicateB zero));
3072   format %{ "pxor    $dst,$dst\t! replicate8B zero" %}
3073   ins_encode %{
3074     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3075   %}
3076   ins_pipe( fpu_reg_reg );
3077 %}
3078 
3079 instruct Repl16B_zero(vecX dst, immI0 zero) %{
3080   predicate(n->as_Vector()->length() == 16);
3081   match(Set dst (ReplicateB zero));
3082   format %{ "pxor    $dst,$dst\t! replicate16B zero" %}
3083   ins_encode %{
3084     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3085   %}
3086   ins_pipe( fpu_reg_reg );
3087 %}
3088 
3089 instruct Repl32B_zero(vecY dst, immI0 zero) %{
3090   predicate(n->as_Vector()->length() == 32);
3091   match(Set dst (ReplicateB zero));
3092   format %{ "vpxor   $dst,$dst,$dst\t! replicate32B zero" %}
3093   ins_encode %{
3094     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3095     int vector_len = 1;
3096     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3097   %}
3098   ins_pipe( fpu_reg_reg );
3099 %}
3100 
3101 instruct Repl64B_zero(vecZ dst, immI0 zero) %{
3102   predicate(n->as_Vector()->length() == 64);
3103   match(Set dst (ReplicateB zero));
3104   format %{ "vpxor   $dst k0,$dst,$dst\t! replicate64B zero" %}
3105   ins_encode %{
3106     // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
3107     int vector_len = 2;
3108     __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3109   %}
3110   ins_pipe( fpu_reg_reg );
3111 %}
3112 
3113 // Replicate char/short (2 byte) scalar to be vector
3114 instruct Repl2S(vecS dst, rRegI src) %{
3115   predicate(n->as_Vector()->length() == 2);
3116   match(Set dst (ReplicateS src));
3117   format %{ "movd    $dst,$src\n\t"
3118             "pshuflw $dst,$dst,0x00\t! replicate2S" %}
3119   ins_encode %{
3120     __ movdl($dst$$XMMRegister, $src$$Register);
3121     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3122   %}
3123   ins_pipe( fpu_reg_reg );
3124 %}
3125 
3126 instruct Repl4S(vecD dst, rRegI src) %{
3127   predicate(n->as_Vector()->length() == 4);
3128   match(Set dst (ReplicateS src));


3148   %}
3149   ins_pipe( pipe_slow );
3150 %}
3151 
3152 instruct Repl16S(vecY dst, rRegI src) %{
3153   predicate(n->as_Vector()->length() == 16);
3154   match(Set dst (ReplicateS src));
3155   format %{ "movd    $dst,$src\n\t"
3156             "pshuflw $dst,$dst,0x00\n\t"
3157             "punpcklqdq $dst,$dst\n\t"
3158             "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
3159   ins_encode %{
3160     __ movdl($dst$$XMMRegister, $src$$Register);
3161     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3162     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3163     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3164   %}
3165   ins_pipe( pipe_slow );
3166 %}
3167 
3168 instruct Repl32S(vecZ dst, rRegI src) %{
3169   predicate(n->as_Vector()->length() == 32);
3170   match(Set dst (ReplicateS src));
3171   format %{ "movd    $dst,$src\n\t"
3172             "pshuflw $dst,$dst,0x00\n\t"
3173             "punpcklqdq $dst,$dst\n\t"
3174             "vinserti128h $dst,$dst,$dst\t! lower replicate16S\n\t"
3175             "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S" %}
3176   ins_encode %{
3177     __ movdl($dst$$XMMRegister, $src$$Register);
3178     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3179     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3180     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3181     __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3182   %}
3183   ins_pipe( pipe_slow );
3184 %}
3185 
3186 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
3187 instruct Repl2S_imm(vecS dst, immI con) %{
3188   predicate(n->as_Vector()->length() == 2);
3189   match(Set dst (ReplicateS con));
3190   format %{ "movdl   $dst,[$constantaddress]\t! replicate2S($con)" %}
3191   ins_encode %{
3192     __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
3193   %}
3194   ins_pipe( fpu_reg_reg );
3195 %}
3196 
3197 instruct Repl4S_imm(vecD dst, immI con) %{
3198   predicate(n->as_Vector()->length() == 4);
3199   match(Set dst (ReplicateS con));
3200   format %{ "movq    $dst,[$constantaddress]\t! replicate4S($con)" %}
3201   ins_encode %{
3202     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3203   %}
3204   ins_pipe( fpu_reg_reg );
3205 %}


3213     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3214     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3215   %}
3216   ins_pipe( pipe_slow );
3217 %}
3218 
3219 instruct Repl16S_imm(vecY dst, immI con) %{
3220   predicate(n->as_Vector()->length() == 16);
3221   match(Set dst (ReplicateS con));
3222   format %{ "movq    $dst,[$constantaddress]\n\t"
3223             "punpcklqdq $dst,$dst\n\t"
3224             "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
3225   ins_encode %{
3226     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3227     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3228     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3229   %}
3230   ins_pipe( pipe_slow );
3231 %}
3232 
3233 instruct Repl32S_imm(vecZ dst, immI con) %{
3234   predicate(n->as_Vector()->length() == 32);
3235   match(Set dst (ReplicateS con));
3236   format %{ "movq    $dst,[$constantaddress]\n\t"
3237             "punpcklqdq $dst,$dst\n\t"
3238             "vinserti128h $dst,$dst,$dst\t! lower replicate16S($con)\n\t"
3239             "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S($con)" %}
3240   ins_encode %{
3241     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3242     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3243     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3244     __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3245   %}
3246   ins_pipe( pipe_slow );
3247 %}
3248 
3249 // Replicate char/short (2 byte) scalar zero to be vector
3250 instruct Repl2S_zero(vecS dst, immI0 zero) %{
3251   predicate(n->as_Vector()->length() == 2);
3252   match(Set dst (ReplicateS zero));
3253   format %{ "pxor    $dst,$dst\t! replicate2S zero" %}
3254   ins_encode %{
3255     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3256   %}
3257   ins_pipe( fpu_reg_reg );
3258 %}
3259 
3260 instruct Repl4S_zero(vecD dst, immI0 zero) %{
3261   predicate(n->as_Vector()->length() == 4);
3262   match(Set dst (ReplicateS zero));
3263   format %{ "pxor    $dst,$dst\t! replicate4S zero" %}
3264   ins_encode %{
3265     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3266   %}
3267   ins_pipe( fpu_reg_reg );
3268 %}
3269 
3270 instruct Repl8S_zero(vecX dst, immI0 zero) %{
3271   predicate(n->as_Vector()->length() == 8);
3272   match(Set dst (ReplicateS zero));
3273   format %{ "pxor    $dst,$dst\t! replicate8S zero" %}
3274   ins_encode %{
3275     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3276   %}
3277   ins_pipe( fpu_reg_reg );
3278 %}
3279 
3280 instruct Repl16S_zero(vecY dst, immI0 zero) %{
3281   predicate(n->as_Vector()->length() == 16);
3282   match(Set dst (ReplicateS zero));
3283   format %{ "vpxor   $dst,$dst,$dst\t! replicate16S zero" %}
3284   ins_encode %{
3285     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3286     int vector_len = 1;
3287     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3288   %}
3289   ins_pipe( fpu_reg_reg );
3290 %}
3291 
3292 instruct Repl32S_zero(vecZ dst, immI0 zero) %{
3293   predicate(n->as_Vector()->length() == 32);
3294   match(Set dst (ReplicateS zero));
3295   format %{ "vpxor   $dst k0,$dst,$dst\t! replicate32S zero" %}
3296   ins_encode %{
3297     // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
3298     int vector_len = 2;
3299     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3300   %}
3301   ins_pipe( fpu_reg_reg );
3302 %}
3303 
3304 // Replicate integer (4 byte) scalar to be vector
3305 instruct Repl2I(vecD dst, rRegI src) %{
3306   predicate(n->as_Vector()->length() == 2);
3307   match(Set dst (ReplicateI src));
3308   format %{ "movd    $dst,$src\n\t"
3309             "pshufd  $dst,$dst,0x00\t! replicate2I" %}
3310   ins_encode %{
3311     __ movdl($dst$$XMMRegister, $src$$Register);
3312     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3313   %}
3314   ins_pipe( fpu_reg_reg );
3315 %}
3316 
3317 instruct Repl4I(vecX dst, rRegI src) %{
3318   predicate(n->as_Vector()->length() == 4);
3319   match(Set dst (ReplicateI src));


3323     __ movdl($dst$$XMMRegister, $src$$Register);
3324     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3325   %}
3326   ins_pipe( pipe_slow );
3327 %}
3328 
3329 instruct Repl8I(vecY dst, rRegI src) %{
3330   predicate(n->as_Vector()->length() == 8);
3331   match(Set dst (ReplicateI src));
3332   format %{ "movd    $dst,$src\n\t"
3333             "pshufd  $dst,$dst,0x00\n\t"
3334             "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
3335   ins_encode %{
3336     __ movdl($dst$$XMMRegister, $src$$Register);
3337     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3338     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3339   %}
3340   ins_pipe( pipe_slow );
3341 %}
3342 
3343 instruct Repl16I(vecZ dst, rRegI src) %{
3344   predicate(n->as_Vector()->length() == 16);
3345   match(Set dst (ReplicateI src));
3346   format %{ "movd    $dst,$src\n\t"
3347             "pshufd  $dst,$dst,0x00\n\t"
3348             "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t"
3349             "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %}
3350   ins_encode %{
3351     __ movdl($dst$$XMMRegister, $src$$Register);
3352     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3353     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3354     __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3355   %}
3356   ins_pipe( pipe_slow );
3357 %}
3358 
3359 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
3360 instruct Repl2I_imm(vecD dst, immI con) %{
3361   predicate(n->as_Vector()->length() == 2);
3362   match(Set dst (ReplicateI con));
3363   format %{ "movq    $dst,[$constantaddress]\t! replicate2I($con)" %}
3364   ins_encode %{
3365     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3366   %}
3367   ins_pipe( fpu_reg_reg );
3368 %}
3369 
3370 instruct Repl4I_imm(vecX dst, immI con) %{
3371   predicate(n->as_Vector()->length() == 4);
3372   match(Set dst (ReplicateI con));
3373   format %{ "movq    $dst,[$constantaddress]\t! replicate4I($con)\n\t"
3374             "punpcklqdq $dst,$dst" %}
3375   ins_encode %{
3376     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3377     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3378   %}
3379   ins_pipe( pipe_slow );
3380 %}
3381 
3382 instruct Repl8I_imm(vecY dst, immI con) %{
3383   predicate(n->as_Vector()->length() == 8);
3384   match(Set dst (ReplicateI con));
3385   format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
3386             "punpcklqdq $dst,$dst\n\t"
3387             "vinserti128h $dst,$dst,$dst" %}
3388   ins_encode %{
3389     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3390     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3391     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3392   %}
3393   ins_pipe( pipe_slow );
3394 %}
3395 
3396 instruct Repl16I_imm(vecZ dst, immI con) %{
3397   predicate(n->as_Vector()->length() == 16);
3398   match(Set dst (ReplicateI con));
3399   format %{ "movq    $dst,[$constantaddress]\t! replicate16I($con)\n\t"
3400             "punpcklqdq $dst,$dst\n\t"
3401             "vinserti128h $dst,$dst,$dst\n\t"
3402             "vinserti64x4h $dst k0,$dst,$dst" %}
3403   ins_encode %{
3404     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3405     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3406     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3407     __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3408   %}
3409   ins_pipe( pipe_slow );
3410 %}
3411 
3412 // Integer could be loaded into xmm register directly from memory.
3413 instruct Repl2I_mem(vecD dst, memory mem) %{
3414   predicate(n->as_Vector()->length() == 2);
3415   match(Set dst (ReplicateI (LoadI mem)));
3416   format %{ "movd    $dst,$mem\n\t"
3417             "pshufd  $dst,$dst,0x00\t! replicate2I" %}
3418   ins_encode %{
3419     __ movdl($dst$$XMMRegister, $mem$$Address);
3420     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3421   %}
3422   ins_pipe( fpu_reg_reg );
3423 %}
3424 
3425 instruct Repl4I_mem(vecX dst, memory mem) %{
3426   predicate(n->as_Vector()->length() == 4);
3427   match(Set dst (ReplicateI (LoadI mem)));
3428   format %{ "movd    $dst,$mem\n\t"
3429             "pshufd  $dst,$dst,0x00\t! replicate4I" %}
3430   ins_encode %{
3431     __ movdl($dst$$XMMRegister, $mem$$Address);
3432     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3433   %}
3434   ins_pipe( pipe_slow );
3435 %}
3436 
3437 instruct Repl8I_mem(vecY dst, memory mem) %{
3438   predicate(n->as_Vector()->length() == 8);
3439   match(Set dst (ReplicateI (LoadI mem)));
3440   format %{ "movd    $dst,$mem\n\t"
3441             "pshufd  $dst,$dst,0x00\n\t"
3442             "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
3443   ins_encode %{
3444     __ movdl($dst$$XMMRegister, $mem$$Address);
3445     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3446     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3447   %}
3448   ins_pipe( pipe_slow );
3449 %}
3450 
3451 instruct Repl16I_mem(vecZ dst, memory mem) %{
3452   predicate(n->as_Vector()->length() == 16);
3453   match(Set dst (ReplicateI (LoadI mem)));
3454   format %{ "movd    $dst,$mem\n\t"
3455             "pshufd  $dst,$dst,0x00\n\t"
3456             "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t"
3457             "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %}
3458   ins_encode %{
3459     __ movdl($dst$$XMMRegister, $mem$$Address);
3460     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3461     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3462     __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3463   %}
3464   ins_pipe( pipe_slow );
3465 %}
3466 
3467 // Replicate integer (4 byte) scalar zero to be vector
3468 instruct Repl2I_zero(vecD dst, immI0 zero) %{
3469   predicate(n->as_Vector()->length() == 2);
3470   match(Set dst (ReplicateI zero));
3471   format %{ "pxor    $dst,$dst\t! replicate2I" %}
3472   ins_encode %{
3473     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3474   %}
3475   ins_pipe( fpu_reg_reg );
3476 %}
3477 
3478 instruct Repl4I_zero(vecX dst, immI0 zero) %{
3479   predicate(n->as_Vector()->length() == 4);
3480   match(Set dst (ReplicateI zero));
3481   format %{ "pxor    $dst,$dst\t! replicate4I zero)" %}
3482   ins_encode %{
3483     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3484   %}
3485   ins_pipe( fpu_reg_reg );
3486 %}
3487 
3488 instruct Repl8I_zero(vecY dst, immI0 zero) %{
3489   predicate(n->as_Vector()->length() == 8);
3490   match(Set dst (ReplicateI zero));
3491   format %{ "vpxor   $dst,$dst,$dst\t! replicate8I zero" %}
3492   ins_encode %{
3493     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3494     int vector_len = 1;
3495     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3496   %}
3497   ins_pipe( fpu_reg_reg );
3498 %}
3499 
3500 instruct Repl16I_zero(vecZ dst, immI0 zero) %{
3501   predicate(n->as_Vector()->length() == 16);
3502   match(Set dst (ReplicateI zero));
3503   format %{ "vpxor   $dst k0,$dst,$dst\t! replicate16I zero" %}
3504   ins_encode %{
3505     // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it).
3506     int vector_len = 2;
3507     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3508   %}
3509   ins_pipe( fpu_reg_reg );
3510 %}
3511 
3512 // Replicate long (8 byte) scalar to be vector
3513 #ifdef _LP64
3514 instruct Repl2L(vecX dst, rRegL src) %{
3515   predicate(n->as_Vector()->length() == 2);
3516   match(Set dst (ReplicateL src));
3517   format %{ "movdq   $dst,$src\n\t"
3518             "punpcklqdq $dst,$dst\t! replicate2L" %}
3519   ins_encode %{
3520     __ movdq($dst$$XMMRegister, $src$$Register);
3521     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3522   %}
3523   ins_pipe( pipe_slow );
3524 %}
3525 
3526 instruct Repl4L(vecY dst, rRegL src) %{
3527   predicate(n->as_Vector()->length() == 4);
3528   match(Set dst (ReplicateL src));
3529   format %{ "movdq   $dst,$src\n\t"
3530             "punpcklqdq $dst,$dst\n\t"
3531             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
3532   ins_encode %{
3533     __ movdq($dst$$XMMRegister, $src$$Register);
3534     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3535     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3536   %}
3537   ins_pipe( pipe_slow );
3538 %}
3539 
3540 instruct Repl8L(vecZ dst, rRegL src) %{
3541   predicate(n->as_Vector()->length() == 8);
3542   match(Set dst (ReplicateL src));
3543   format %{ "movdq   $dst,$src\n\t"
3544             "punpcklqdq $dst,$dst\n\t"
3545             "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t"
3546             "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %}
3547   ins_encode %{
3548     __ movdq($dst$$XMMRegister, $src$$Register);
3549     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3550     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3551     __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3552   %}
3553   ins_pipe( pipe_slow );
3554 %}
3555 #else // _LP64
3556 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
3557   predicate(n->as_Vector()->length() == 2);
3558   match(Set dst (ReplicateL src));
3559   effect(TEMP dst, USE src, TEMP tmp);
3560   format %{ "movdl   $dst,$src.lo\n\t"
3561             "movdl   $tmp,$src.hi\n\t"
3562             "punpckldq $dst,$tmp\n\t"
3563             "punpcklqdq $dst,$dst\t! replicate2L"%}
3564   ins_encode %{
3565     __ movdl($dst$$XMMRegister, $src$$Register);
3566     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3567     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3568     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3569   %}
3570   ins_pipe( pipe_slow );
3571 %}
3572 
3573 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
3574   predicate(n->as_Vector()->length() == 4);
3575   match(Set dst (ReplicateL src));
3576   effect(TEMP dst, USE src, TEMP tmp);
3577   format %{ "movdl   $dst,$src.lo\n\t"
3578             "movdl   $tmp,$src.hi\n\t"
3579             "punpckldq $dst,$tmp\n\t"
3580             "punpcklqdq $dst,$dst\n\t"
3581             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
3582   ins_encode %{
3583     __ movdl($dst$$XMMRegister, $src$$Register);
3584     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3585     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3586     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3587     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3588   %}
3589   ins_pipe( pipe_slow );
3590 %}
3591 
3592 instruct Repl8L(vecZ dst, eRegL src, regD tmp) %{
3593   predicate(n->as_Vector()->length() == 4);
3594   match(Set dst (ReplicateL src));
3595   effect(TEMP dst, USE src, TEMP tmp);
3596   format %{ "movdl   $dst,$src.lo\n\t"
3597             "movdl   $tmp,$src.hi\n\t"
3598             "punpckldq $dst,$tmp\n\t"
3599             "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t"
3600             "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %}
3601   ins_encode %{
3602     __ movdl($dst$$XMMRegister, $src$$Register);
3603     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3604     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3605     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3606     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3607     __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3608   %}
3609   ins_pipe( pipe_slow );
3610 %}
3611 #endif // _LP64
3612 
3613 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
3614 instruct Repl2L_imm(vecX dst, immL con) %{
3615   predicate(n->as_Vector()->length() == 2);
3616   match(Set dst (ReplicateL con));
3617   format %{ "movq    $dst,[$constantaddress]\n\t"
3618             "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
3619   ins_encode %{
3620     __ movq($dst$$XMMRegister, $constantaddress($con));
3621     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3622   %}
3623   ins_pipe( pipe_slow );
3624 %}
3625 
3626 instruct Repl4L_imm(vecY dst, immL con) %{
3627   predicate(n->as_Vector()->length() == 4);
3628   match(Set dst (ReplicateL con));
3629   format %{ "movq    $dst,[$constantaddress]\n\t"
3630             "punpcklqdq $dst,$dst\n\t"
3631             "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
3632   ins_encode %{
3633     __ movq($dst$$XMMRegister, $constantaddress($con));
3634     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3635     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3636   %}
3637   ins_pipe( pipe_slow );
3638 %}
3639 
3640 instruct Repl8L_imm(vecZ dst, immL con) %{
3641   predicate(n->as_Vector()->length() == 8);
3642   match(Set dst (ReplicateL con));
3643   format %{ "movq    $dst,[$constantaddress]\n\t"
3644             "punpcklqdq $dst,$dst\n\t"
3645             "vinserti128h $dst,$dst,$dst\t! lower replicate4L($con)\n\t"
3646             "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L($con)" %}
3647   ins_encode %{
3648     __ movq($dst$$XMMRegister, $constantaddress($con));
3649     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3650     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3651     __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3652   %}
3653   ins_pipe( pipe_slow );
3654 %}
3655 
3656 // Long could be loaded into xmm register directly from memory.
3657 instruct Repl2L_mem(vecX dst, memory mem) %{
3658   predicate(n->as_Vector()->length() == 2);
3659   match(Set dst (ReplicateL (LoadL mem)));
3660   format %{ "movq    $dst,$mem\n\t"
3661             "punpcklqdq $dst,$dst\t! replicate2L" %}
3662   ins_encode %{
3663     __ movq($dst$$XMMRegister, $mem$$Address);
3664     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3665   %}
3666   ins_pipe( pipe_slow );
3667 %}
3668 
3669 instruct Repl4L_mem(vecY dst, memory mem) %{
3670   predicate(n->as_Vector()->length() == 4);
3671   match(Set dst (ReplicateL (LoadL mem)));
3672   format %{ "movq    $dst,$mem\n\t"
3673             "punpcklqdq $dst,$dst\n\t"
3674             "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
3675   ins_encode %{
3676     __ movq($dst$$XMMRegister, $mem$$Address);
3677     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3678     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3679   %}
3680   ins_pipe( pipe_slow );
3681 %}
3682 
3683 instruct Repl8L_mem(vecZ dst, memory mem) %{
3684   predicate(n->as_Vector()->length() == 8);
3685   match(Set dst (ReplicateL (LoadL mem)));
3686   format %{ "movq    $dst,$mem\n\t"
3687             "punpcklqdq $dst,$dst\n\t"
3688             "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t"
3689             "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %}
3690   ins_encode %{
3691     __ movq($dst$$XMMRegister, $mem$$Address);
3692     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3693     __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3694     __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3695   %}
3696   ins_pipe( pipe_slow );
3697 %}
3698 
3699 // Replicate long (8 byte) scalar zero to be vector
3700 instruct Repl2L_zero(vecX dst, immL0 zero) %{
3701   predicate(n->as_Vector()->length() == 2);
3702   match(Set dst (ReplicateL zero));
3703   format %{ "pxor    $dst,$dst\t! replicate2L zero" %}
3704   ins_encode %{
3705     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3706   %}
3707   ins_pipe( fpu_reg_reg );
3708 %}
3709 
3710 instruct Repl4L_zero(vecY dst, immL0 zero) %{
3711   predicate(n->as_Vector()->length() == 4);
3712   match(Set dst (ReplicateL zero));
3713   format %{ "vpxor   $dst,$dst,$dst\t! replicate4L zero" %}
3714   ins_encode %{
3715     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3716     int vector_len = 1;
3717     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3718   %}
3719   ins_pipe( fpu_reg_reg );
3720 %}
3721 
3722 instruct Repl8L_zero(vecZ dst, immL0 zero) %{
3723   predicate(n->as_Vector()->length() == 8);
3724   match(Set dst (ReplicateL zero));
3725   format %{ "vpxor   $dst k0,$dst,$dst\t! replicate8L zero" %}
3726   ins_encode %{
3727     // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
3728     int vector_len = 2;
3729     __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3730   %}
3731   ins_pipe( fpu_reg_reg );
3732 %}
3733 
3734 // Replicate float (4 byte) scalar to be vector
3735 instruct Repl2F(vecD dst, regF src) %{
3736   predicate(n->as_Vector()->length() == 2);
3737   match(Set dst (ReplicateF src));
3738   format %{ "pshufd  $dst,$dst,0x00\t! replicate2F" %}
3739   ins_encode %{
3740     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3741   %}
3742   ins_pipe( fpu_reg_reg );
3743 %}
3744 
3745 instruct Repl4F(vecX dst, regF src) %{
3746   predicate(n->as_Vector()->length() == 4);
3747   match(Set dst (ReplicateF src));
3748   format %{ "pshufd  $dst,$dst,0x00\t! replicate4F" %}
3749   ins_encode %{
3750     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3751   %}
3752   ins_pipe( pipe_slow );
3753 %}
3754 
3755 instruct Repl8F(vecY dst, regF src) %{
3756   predicate(n->as_Vector()->length() == 8);
3757   match(Set dst (ReplicateF src));
3758   format %{ "pshufd  $dst,$src,0x00\n\t"
3759             "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
3760   ins_encode %{
3761     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3762     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3763   %}
3764   ins_pipe( pipe_slow );
3765 %}
3766 
3767 instruct Repl16F(vecZ dst, regF src) %{
3768   predicate(n->as_Vector()->length() == 16);
3769   match(Set dst (ReplicateF src));
3770   format %{ "pshufd  $dst,$src,0x00\n\t"
3771             "vinsertf128h $dst,$dst,$dst\t! lower replicate8F\n\t"
3772             "vinsertf64x4h $dst k0,$dst,$dst\t! lower replicate8F" %}
3773   ins_encode %{
3774     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3775     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3776     __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3777   %}
3778   ins_pipe( pipe_slow );
3779 %}
3780 
3781 // Replicate float (4 byte) scalar zero to be vector
3782 instruct Repl2F_zero(vecD dst, immF0 zero) %{
3783   predicate(n->as_Vector()->length() == 2);
3784   match(Set dst (ReplicateF zero));
3785   format %{ "xorps   $dst,$dst\t! replicate2F zero" %}
3786   ins_encode %{
3787     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
3788   %}
3789   ins_pipe( fpu_reg_reg );
3790 %}
3791 
3792 instruct Repl4F_zero(vecX dst, immF0 zero) %{
3793   predicate(n->as_Vector()->length() == 4);
3794   match(Set dst (ReplicateF zero));
3795   format %{ "xorps   $dst,$dst\t! replicate4F zero" %}
3796   ins_encode %{
3797     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
3798   %}
3799   ins_pipe( fpu_reg_reg );
3800 %}
3801 
3802 instruct Repl8F_zero(vecY dst, immF0 zero) %{
3803   predicate(n->as_Vector()->length() == 8);
3804   match(Set dst (ReplicateF zero));
3805   format %{ "vxorps  $dst,$dst,$dst\t! replicate8F zero" %}
3806   ins_encode %{
3807     int vector_len = 1;
3808     __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3809   %}
3810   ins_pipe( fpu_reg_reg );
3811 %}
3812 
3813 instruct Repl16F_zero(vecZ dst, immF0 zero) %{
3814   predicate(n->as_Vector()->length() == 16);
3815   match(Set dst (ReplicateF zero));
3816   format %{ "vxorps  $dst k0,$dst,$dst\t! replicate16F zero" %}
3817   ins_encode %{
3818     int vector_len = 2;
3819     __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3820   %}
3821   ins_pipe( fpu_reg_reg );
3822 %}
3823 
3824 // Replicate double (8 bytes) scalar to be vector
3825 instruct Repl2D(vecX dst, regD src) %{
3826   predicate(n->as_Vector()->length() == 2);
3827   match(Set dst (ReplicateD src));
3828   format %{ "pshufd  $dst,$src,0x44\t! replicate2D" %}
3829   ins_encode %{
3830     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
3831   %}
3832   ins_pipe( pipe_slow );
3833 %}
3834 
3835 instruct Repl4D(vecY dst, regD src) %{
3836   predicate(n->as_Vector()->length() == 4);
3837   match(Set dst (ReplicateD src));
3838   format %{ "pshufd  $dst,$src,0x44\n\t"
3839             "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
3840   ins_encode %{
3841     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
3842     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3843   %}
3844   ins_pipe( pipe_slow );
3845 %}
3846 
3847 instruct Repl8D(vecZ dst, regD src) %{
3848   predicate(n->as_Vector()->length() == 8);
3849   match(Set dst (ReplicateD src));
3850   format %{ "pshufd  $dst,$src,0x44\n\t"
3851             "vinsertf128h $dst,$dst,$dst\t! lower replicate4D\n\t"
3852             "vinsertf64x4h $dst k0,$dst,$dst\t! upper replicate4D" %}
3853   ins_encode %{
3854     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
3855     __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3856     __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3857   %}
3858   ins_pipe( pipe_slow );
3859 %}
3860 
3861 // Replicate double (8 byte) scalar zero to be vector
3862 instruct Repl2D_zero(vecX dst, immD0 zero) %{
3863   predicate(n->as_Vector()->length() == 2);
3864   match(Set dst (ReplicateD zero));
3865   format %{ "xorpd   $dst,$dst\t! replicate2D zero" %}
3866   ins_encode %{
3867     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
3868   %}
3869   ins_pipe( fpu_reg_reg );
3870 %}
3871 
3872 instruct Repl4D_zero(vecY dst, immD0 zero) %{
3873   predicate(n->as_Vector()->length() == 4);
3874   match(Set dst (ReplicateD zero));
3875   format %{ "vxorpd  $dst,$dst,$dst,vect256\t! replicate4D zero" %}
3876   ins_encode %{
3877     int vector_len = 1;
3878     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3879   %}
3880   ins_pipe( fpu_reg_reg );
3881 %}
3882 
3883 instruct Repl8D_zero(vecZ dst, immD0 zero) %{
3884   predicate(n->as_Vector()->length() == 8);
3885   match(Set dst (ReplicateD zero));
3886   format %{ "vxorpd  $dst k0,$dst,$dst,vect512\t! replicate8D zero" %}
3887   ins_encode %{
3888     int vector_len = 2;
3889     __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3890   %}
3891   ins_pipe( fpu_reg_reg );
3892 %}
3893 
3894 // ====================REDUCTION ARITHMETIC=======================================
3895 
3896 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
3897   predicate(UseSSE > 2 && UseAVX == 0);
3898   match(Set dst (AddReductionVI src1 src2));
3899   effect(TEMP tmp2, TEMP tmp);
3900   format %{ "movdqu  $tmp2,$src2\n\t"
3901             "phaddd  $tmp2,$tmp2\n\t"
3902             "movd    $tmp,$src1\n\t"
3903             "paddd   $tmp,$tmp2\n\t"
3904             "movd    $dst,$tmp\t! add reduction2I" %}
3905   ins_encode %{
3906     __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
3907     __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
3908     __ movdl($tmp$$XMMRegister, $src1$$Register);
3909     __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
3910     __ movdl($dst$$Register, $tmp$$XMMRegister);
3911   %}
3912   ins_pipe( pipe_slow );
3913 %}
3914 
3915 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
3916   predicate(UseAVX > 0 && UseAVX < 3);
3917   match(Set dst (AddReductionVI src1 src2));
3918   effect(TEMP tmp, TEMP tmp2);
3919   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
3920             "movd     $tmp2,$src1\n\t"
3921             "vpaddd   $tmp2,$tmp2,$tmp\n\t"
3922             "movd     $dst,$tmp2\t! add reduction2I" %}
3923   ins_encode %{
3924     int vector_len = 0;
3925     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
3926     __ movdl($tmp2$$XMMRegister, $src1$$Register);
3927     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
3928     __ movdl($dst$$Register, $tmp2$$XMMRegister);
3929   %}
3930   ins_pipe( pipe_slow );
3931 %}
3932 
3933 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
3934   predicate(UseAVX > 2);
3935   match(Set dst (AddReductionVI src1 src2));
3936   effect(TEMP tmp, TEMP tmp2);
3937   format %{ "pshufd  $tmp2,$src2,0x1\n\t"
3938             "vpaddd  $tmp,$src2,$tmp2\n\t"
3939             "movd    $tmp2,$src1\n\t"
3940             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
3941             "movd    $dst,$tmp2\t! add reduction2I" %}
3942   ins_encode %{
3943     int vector_len = 0;
3944     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
3945     __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
3946     __ movdl($tmp2$$XMMRegister, $src1$$Register);
3947     __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
3948     __ movdl($dst$$Register, $tmp2$$XMMRegister);
3949   %}
3950   ins_pipe( pipe_slow );
3951 %}
3952 
3953 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
3954   predicate(UseSSE > 2 && UseAVX == 0);
3955   match(Set dst (AddReductionVI src1 src2));
3956   effect(TEMP tmp2, TEMP tmp);
3957   format %{ "movdqu  $tmp2,$src2\n\t"
3958             "phaddd  $tmp2,$tmp2\n\t"
3959             "phaddd  $tmp2,$tmp2\n\t"
3960             "movd    $tmp,$src1\n\t"
3961             "paddd   $tmp,$tmp2\n\t"
3962             "movd    $dst,$tmp\t! add reduction4I" %}
3963   ins_encode %{
3964     __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
3965     __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
3966     __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
3967     __ movdl($tmp$$XMMRegister, $src1$$Register);
3968     __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
3969     __ movdl($dst$$Register, $tmp$$XMMRegister);
3970   %}
3971   ins_pipe( pipe_slow );
3972 %}
3973 
3974 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
3975   predicate(UseAVX > 0 && UseAVX < 3);
3976   match(Set dst (AddReductionVI src1 src2));
3977   effect(TEMP tmp, TEMP tmp2);
3978   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
3979             "vphaddd  $tmp,$tmp,$tmp2\n\t"
3980             "movd     $tmp2,$src1\n\t"
3981             "vpaddd   $tmp2,$tmp2,$tmp\n\t"
3982             "movd     $dst,$tmp2\t! add reduction4I" %}
3983   ins_encode %{
3984     int vector_len = 0;
3985     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
3986     __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
3987     __ movdl($tmp2$$XMMRegister, $src1$$Register);
3988     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
3989     __ movdl($dst$$Register, $tmp2$$XMMRegister);
3990   %}
3991   ins_pipe( pipe_slow );
3992 %}
3993 
3994 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
3995   predicate(UseAVX > 2);
3996   match(Set dst (AddReductionVI src1 src2));
3997   effect(TEMP tmp, TEMP tmp2);
3998   format %{ "pshufd  $tmp2,$src2,0xE\n\t"
3999             "vpaddd  $tmp,$src2,$tmp2\n\t"
4000             "pshufd  $tmp2,$tmp,0x1\n\t"
4001             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4002             "movd    $tmp2,$src1\n\t"
4003             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
4004             "movd    $dst,$tmp2\t! add reduction4I" %}
4005   ins_encode %{
4006     int vector_len = 0;
4007     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4008     __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4009     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4010     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4011     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4012     __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4013     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4014   %}
4015   ins_pipe( pipe_slow );
4016 %}
4017 
4018 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
4019   predicate(UseAVX > 0 && UseAVX < 3);
4020   match(Set dst (AddReductionVI src1 src2));
4021   effect(TEMP tmp, TEMP tmp2);
4022   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
4023             "vphaddd  $tmp,$tmp,$tmp2\n\t"
4024             "vextracti128  $tmp2,$tmp\n\t"
4025             "vpaddd   $tmp,$tmp,$tmp2\n\t"
4026             "movd     $tmp2,$src1\n\t"
4027             "vpaddd   $tmp2,$tmp2,$tmp\n\t"
4028             "movd     $dst,$tmp2\t! add reduction8I" %}
4029   ins_encode %{
4030     int vector_len = 1;
4031     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4032     __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4033     __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister);
4034     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4035     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4036     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4037     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4038   %}
4039   ins_pipe( pipe_slow );
4040 %}
4041 
4042 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
4043   predicate(UseAVX > 2);
4044   match(Set dst (AddReductionVI src1 src2));
4045   effect(TEMP tmp, TEMP tmp2);
4046   format %{ "vextracti128  $tmp,$src2\n\t"
4047             "vpaddd  $tmp,$tmp,$src2\n\t"
4048             "pshufd  $tmp2,$tmp,0xE\n\t"
4049             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4050             "pshufd  $tmp2,$tmp,0x1\n\t"
4051             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4052             "movd    $tmp2,$src1\n\t"
4053             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
4054             "movd    $dst,$tmp2\t! add reduction8I" %}
4055   ins_encode %{
4056     int vector_len = 0;
4057     __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
4058     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
4059     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
4060     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4061     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4062     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4063     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4064     __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4065     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4066   %}
4067   ins_pipe( pipe_slow );
4068 %}
4069 
4070 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
4071   predicate(UseAVX > 2);
4072   match(Set dst (AddReductionVI src1 src2));
4073   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4074   format %{ "vextracti64x4  $tmp3,$src2\n\t"
4075             "vpaddd  $tmp3,$tmp3,$src2\n\t"
4076             "vextracti128   $tmp,$tmp3\n\t"
4077             "vpaddd  $tmp,$tmp,$tmp3\n\t"
4078             "pshufd  $tmp2,$tmp,0xE\n\t"
4079             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4080             "pshufd  $tmp2,$tmp,0x1\n\t"
4081             "vpaddd  $tmp,$tmp,$tmp2\n\t"
4082             "movd    $tmp2,$src1\n\t"
4083             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
4084             "movd    $dst,$tmp2\t! mul reduction16I" %}
4085   ins_encode %{
4086     __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister);
4087     __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
4088     __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
4089     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
4090     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
4091     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4092     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4093     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4094     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4095     __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4096     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4097   %}
4098   ins_pipe( pipe_slow );
4099 %}
4100 
4101 #ifdef _LP64
4102 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{
4103   predicate(UseAVX > 2);
4104   match(Set dst (AddReductionVL src1 src2));
4105   effect(TEMP tmp, TEMP tmp2);
4106   format %{ "pshufd  $tmp2,$src2,0xE\n\t"
4107             "vpaddq  $tmp,$src2,$tmp2\n\t"
4108             "movdq   $tmp2,$src1\n\t"
4109             "vpaddq  $tmp2,$tmp,$tmp2\n\t"
4110             "movdq   $dst,$tmp2\t! add reduction2L" %}
4111   ins_encode %{
4112     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4113     __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0);
4114     __ movdq($tmp2$$XMMRegister, $src1$$Register);
4115     __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4116     __ movdq($dst$$Register, $tmp2$$XMMRegister);
4117   %}
4118   ins_pipe( pipe_slow );
4119 %}
4120 
4121 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
4122   predicate(UseAVX > 2);
4123   match(Set dst (AddReductionVL src1 src2));
4124   effect(TEMP tmp, TEMP tmp2);
4125   format %{ "vextracti64x2  $tmp,$src2, 0x1\n\t"
4126             "vpaddq  $tmp2,$tmp,$src2\n\t"
4127             "pshufd  $tmp,$tmp2,0xE\n\t"
4128             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4129             "movdq   $tmp,$src1\n\t"
4130             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4131             "movdq   $dst,$tmp2\t! add reduction4L" %}
4132   ins_encode %{
4133     __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
4134     __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
4135     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
4136     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4137     __ movdq($tmp$$XMMRegister, $src1$$Register);
4138     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4139     __ movdq($dst$$Register, $tmp2$$XMMRegister);
4140   %}
4141   ins_pipe( pipe_slow );
4142 %}
4143 
4144 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
4145   predicate(UseAVX > 2);
4146   match(Set dst (AddReductionVL src1 src2));
4147   effect(TEMP tmp, TEMP tmp2);
4148   format %{ "vextracti64x4  $tmp2,$src2\n\t"
4149             "vpaddq  $tmp2,$tmp2,$src2\n\t"
4150             "vextracti128   $tmp,$tmp2\n\t"
4151             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4152             "pshufd  $tmp,$tmp2,0xE\n\t"
4153             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4154             "movdq   $tmp,$src1\n\t"
4155             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
4156             "movdq   $dst,$tmp2\t! add reduction8L" %}
4157   ins_encode %{
4158     __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister);
4159     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
4160     __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
4161     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4162     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
4163     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4164     __ movdq($tmp$$XMMRegister, $src1$$Register);
4165     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4166     __ movdq($dst$$Register, $tmp2$$XMMRegister);
4167   %}
4168   ins_pipe( pipe_slow );
4169 %}
4170 #endif
4171 
4172 instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
4173   predicate(UseSSE >= 1 && UseAVX == 0);
4174   match(Set dst (AddReductionVF src1 src2));
4175   effect(TEMP tmp, TEMP tmp2);
4176   format %{ "movdqu  $tmp,$src1\n\t"
4177             "addss   $tmp,$src2\n\t"
4178             "pshufd  $tmp2,$src2,0x01\n\t"
4179             "addss   $tmp,$tmp2\n\t"
4180             "movdqu  $dst,$tmp\t! add reduction2F" %}
4181   ins_encode %{
4182     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
4183     __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
4184     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
4185     __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4186     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
4187   %}
4188   ins_pipe( pipe_slow );
4189 %}
4190 
4191 instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{


4275   ins_encode %{
4276     __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4277     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4278     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4279     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
4280     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4281     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
4282     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4283     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
4284     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4285     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4286     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4287     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4288     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4289     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4290     __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4291   %}
4292   ins_pipe( pipe_slow );
4293 %}
4294 
4295 instruct radd16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
4296   predicate(UseAVX > 2);
4297   match(Set dst (AddReductionVF src1 src2));
4298   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4299   format %{ "vaddss  $tmp2,$src1,$src2\n\t"
4300             "pshufd  $tmp,$src2,0x01\n\t"
4301             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4302             "pshufd  $tmp,$src2,0x02\n\t"
4303             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4304             "pshufd  $tmp,$src2,0x03\n\t"
4305             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4306             "vextractf64x2  $tmp3,$src2, 0x1\n\t"
4307             "vaddss  $tmp2,$tmp2,$tmp3\n\t"
4308             "pshufd  $tmp,$tmp3,0x01\n\t"
4309             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4310             "pshufd  $tmp,$tmp3,0x02\n\t"
4311             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4312             "pshufd  $tmp,$tmp3,0x03\n\t"
4313             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4314             "vextractf64x2  $tmp3,$src2, 0x2\n\t"
4315             "vaddss  $tmp2,$tmp2,$tmp3\n\t"
4316             "pshufd  $tmp,$tmp3,0x01\n\t"
4317             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4318             "pshufd  $tmp,$tmp3,0x02\n\t"
4319             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4320             "pshufd  $tmp,$tmp3,0x03\n\t"
4321             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4322             "vextractf64x2  $tmp3,$src2, 0x3\n\t"
4323             "vaddss  $tmp2,$tmp2,$tmp3\n\t"
4324             "pshufd  $tmp,$tmp3,0x01\n\t"
4325             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4326             "pshufd  $tmp,$tmp3,0x02\n\t"
4327             "vaddss  $tmp2,$tmp2,$tmp\n\t"
4328             "pshufd  $tmp,$tmp3,0x03\n\t"
4329             "vaddss  $dst,$tmp2,$tmp\t! add reduction16F" %}
4330   ins_encode %{
4331     __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4332     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4333     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4334     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
4335     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4336     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
4337     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4338     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
4339     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4340     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4341     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4342     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4343     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4344     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4345     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4346     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
4347     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4348     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4349     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4350     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4351     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4352     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4353     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4354     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
4355     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4356     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4357     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4358     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4359     __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4360     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4361     __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4362   %}
4363   ins_pipe( pipe_slow );
4364 %}
4365 
4366 instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
4367   predicate(UseSSE >= 1 && UseAVX == 0);
4368   match(Set dst (AddReductionVD src1 src2));
4369   effect(TEMP tmp, TEMP dst);
4370   format %{ "movdqu  $tmp,$src1\n\t"
4371             "addsd   $tmp,$src2\n\t"
4372             "pshufd  $dst,$src2,0xE\n\t"
4373             "addsd   $dst,$tmp\t! add reduction2D" %}
4374   ins_encode %{
4375     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
4376     __ addsd($tmp$$XMMRegister, $src2$$XMMRegister);
4377     __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
4378     __ addsd($dst$$XMMRegister, $tmp$$XMMRegister);
4379   %}
4380   ins_pipe( pipe_slow );
4381 %}
4382 
4383 instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
4384   predicate(UseAVX > 0);
4385   match(Set dst (AddReductionVD src1 src2));


4393     __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4394   %}
4395   ins_pipe( pipe_slow );
4396 %}
4397 
4398 instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
4399   predicate(UseAVX > 0);
4400   match(Set dst (AddReductionVD src1 src2));
4401   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4402   format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
4403             "pshufd  $tmp,$src2,0xE\n\t"
4404             "vaddsd  $tmp2,$tmp2,$tmp\n\t"
4405             "vextractf128  $tmp3,$src2\n\t"
4406             "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
4407             "pshufd  $tmp,$tmp3,0xE\n\t"
4408             "vaddsd  $dst,$tmp2,$tmp\t! add reduction4D" %}
4409   ins_encode %{
4410     __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4411     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
4412     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4413     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
4414     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4415     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4416     __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4417   %}
4418   ins_pipe( pipe_slow );
4419 %}
4420 
4421 instruct rvadd8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{
4422   predicate(UseAVX > 2);
4423   match(Set dst (AddReductionVD src1 src2));
4424   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4425   format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
4426             "pshufd  $tmp,$src2,0xE\n\t"
4427             "vaddsd  $tmp2,$tmp2,$tmp\n\t"
4428             "vextractf64x2  $tmp3,$src2, 0x1\n\t"
4429             "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
4430             "pshufd  $tmp,$tmp3,0xE\n\t"
4431             "vaddsd  $tmp2,$tmp2,$tmp\n\t"
4432             "vextractf64x2  $tmp3,$src2, 0x2\n\t"
4433             "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
4434             "pshufd  $tmp,$tmp3,0xE\n\t"
4435             "vaddsd  $tmp2,$tmp2,$tmp\n\t"
4436             "vextractf64x2  $tmp3,$src2, 0x3\n\t"
4437             "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
4438             "pshufd  $tmp,$tmp3,0xE\n\t"
4439             "vaddsd  $dst,$tmp2,$tmp\t! add reduction8D" %}
4440   ins_encode %{
4441     __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4442     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
4443     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4444     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
4445     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4446     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4447     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4448     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
4449     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4450     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4451     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4452     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
4453     __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4454     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4455     __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4456   %}
4457   ins_pipe( pipe_slow );
4458 %}
4459 
4460 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
4461   predicate(UseSSE > 3 && UseAVX == 0);
4462   match(Set dst (MulReductionVI src1 src2));
4463   effect(TEMP tmp, TEMP tmp2);
4464   format %{ "pshufd  $tmp2,$src2,0x1\n\t"
4465             "pmulld  $tmp2,$src2\n\t"
4466             "movd    $tmp,$src1\n\t"
4467             "pmulld  $tmp2,$tmp\n\t"
4468             "movd    $dst,$tmp2\t! mul reduction2I" %}
4469   ins_encode %{
4470     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
4471     __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
4472     __ movdl($tmp$$XMMRegister, $src1$$Register);
4473     __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
4474     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4475   %}
4476   ins_pipe( pipe_slow );
4477 %}
4478 
4479 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
4480   predicate(UseAVX > 0);
4481   match(Set dst (MulReductionVI src1 src2));
4482   effect(TEMP tmp, TEMP tmp2);
4483   format %{ "pshufd   $tmp2,$src2,0x1\n\t"
4484             "vpmulld  $tmp,$src2,$tmp2\n\t"
4485             "movd     $tmp2,$src1\n\t"
4486             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
4487             "movd     $dst,$tmp2\t! mul reduction2I" %}
4488   ins_encode %{
4489     int vector_len = 0;
4490     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
4491     __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4492     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4493     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4494     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4495   %}
4496   ins_pipe( pipe_slow );
4497 %}
4498 
4499 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
4500   predicate(UseSSE > 3 && UseAVX == 0);
4501   match(Set dst (MulReductionVI src1 src2));
4502   effect(TEMP tmp, TEMP tmp2);
4503   format %{ "pshufd  $tmp2,$src2,0xE\n\t"
4504             "pmulld  $tmp2,$src2\n\t"
4505             "pshufd  $tmp,$tmp2,0x1\n\t"
4506             "pmulld  $tmp2,$tmp\n\t"
4507             "movd    $tmp,$src1\n\t"
4508             "pmulld  $tmp2,$tmp\n\t"
4509             "movd    $dst,$tmp2\t! mul reduction4I" %}
4510   ins_encode %{
4511     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4512     __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
4513     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1);
4514     __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
4515     __ movdl($tmp$$XMMRegister, $src1$$Register);
4516     __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
4517     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4518   %}
4519   ins_pipe( pipe_slow );
4520 %}
4521 
4522 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
4523   predicate(UseAVX > 0);
4524   match(Set dst (MulReductionVI src1 src2));
4525   effect(TEMP tmp, TEMP tmp2);
4526   format %{ "pshufd   $tmp2,$src2,0xE\n\t"
4527             "vpmulld  $tmp,$src2,$tmp2\n\t"
4528             "pshufd   $tmp2,$tmp,0x1\n\t"
4529             "vpmulld  $tmp,$tmp,$tmp2\n\t"
4530             "movd     $tmp2,$src1\n\t"
4531             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
4532             "movd     $dst,$tmp2\t! mul reduction4I" %}
4533   ins_encode %{
4534     int vector_len = 0;
4535     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4536     __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4537     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4538     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4539     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4540     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4541     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4542   %}
4543   ins_pipe( pipe_slow );
4544 %}
4545 
4546 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
4547   predicate(UseAVX > 0);
4548   match(Set dst (MulReductionVI src1 src2));
4549   effect(TEMP tmp, TEMP tmp2);
4550   format %{ "vextracti128  $tmp,$src2\n\t"
4551             "vpmulld  $tmp,$tmp,$src2\n\t"
4552             "pshufd   $tmp2,$tmp,0xE\n\t"
4553             "vpmulld  $tmp,$tmp,$tmp2\n\t"
4554             "pshufd   $tmp2,$tmp,0x1\n\t"
4555             "vpmulld  $tmp,$tmp,$tmp2\n\t"
4556             "movd     $tmp2,$src1\n\t"
4557             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
4558             "movd     $dst,$tmp2\t! mul reduction8I" %}
4559   ins_encode %{
4560     int vector_len = 0;
4561     __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
4562     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
4563     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
4564     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4565     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4566     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4567     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4568     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4569     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4570   %}
4571   ins_pipe( pipe_slow );
4572 %}
4573 
4574 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
4575   predicate(UseAVX > 2);
4576   match(Set dst (MulReductionVI src1 src2));
4577   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4578   format %{ "vextracti64x4  $tmp3,$src2\n\t"
4579             "vpmulld  $tmp3,$tmp3,$src2\n\t"
4580             "vextracti128   $tmp,$tmp3\n\t"
4581             "vpmulld  $tmp,$tmp,$src2\n\t"
4582             "pshufd   $tmp2,$tmp,0xE\n\t"
4583             "vpmulld  $tmp,$tmp,$tmp2\n\t"
4584             "pshufd   $tmp2,$tmp,0x1\n\t"
4585             "vpmulld  $tmp,$tmp,$tmp2\n\t"
4586             "movd     $tmp2,$src1\n\t"
4587             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
4588             "movd     $dst,$tmp2\t! mul reduction16I" %}
4589   ins_encode %{
4590     __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister);
4591     __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
4592     __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
4593     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
4594     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
4595     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4596     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4597     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4598     __ movdl($tmp2$$XMMRegister, $src1$$Register);
4599     __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4600     __ movdl($dst$$Register, $tmp2$$XMMRegister);
4601   %}
4602   ins_pipe( pipe_slow );
4603 %}
4604 
4605 #ifdef _LP64
4606 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{
4607   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
4608   match(Set dst (MulReductionVL src1 src2));
4609   effect(TEMP tmp, TEMP tmp2);
4610   format %{ "pshufd   $tmp2,$src2,0xE\n\t"
4611             "vpmullq  $tmp,$src2,$tmp2\n\t"
4612             "movdq    $tmp2,$src1\n\t"
4613             "vpmullq  $tmp2,$tmp,$tmp2\n\t"
4614             "movdq    $dst,$tmp2\t! mul reduction2L" %}
4615   ins_encode %{
4616     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4617     __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0);
4618     __ movdq($tmp2$$XMMRegister, $src1$$Register);
4619     __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4620     __ movdq($dst$$Register, $tmp2$$XMMRegister);
4621   %}
4622   ins_pipe( pipe_slow );
4623 %}
4624 
4625 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
4626   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
4627   match(Set dst (MulReductionVL src1 src2));
4628   effect(TEMP tmp, TEMP tmp2);
4629   format %{ "vextracti64x2  $tmp,$src2, 0x1\n\t"
4630             "vpmullq  $tmp2,$tmp,$src2\n\t"
4631             "pshufd   $tmp,$tmp2,0xE\n\t"
4632             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
4633             "movdq    $tmp,$src1\n\t"
4634             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
4635             "movdq    $dst,$tmp2\t! mul reduction4L" %}
4636   ins_encode %{
4637     __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
4638     __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
4639     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
4640     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4641     __ movdq($tmp$$XMMRegister, $src1$$Register);
4642     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4643     __ movdq($dst$$Register, $tmp2$$XMMRegister);
4644   %}
4645   ins_pipe( pipe_slow );
4646 %}
4647 
4648 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
4649   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
4650   match(Set dst (MulReductionVL src1 src2));
4651   effect(TEMP tmp, TEMP tmp2);
4652   format %{ "vextracti64x4  $tmp2,$src2\n\t"
4653             "vpmullq  $tmp2,$tmp2,$src2\n\t"
4654             "vextracti128   $tmp,$tmp2\n\t"
4655             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
4656             "pshufd   $tmp,$tmp2,0xE\n\t"
4657             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
4658             "movdq    $tmp,$src1\n\t"
4659             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
4660             "movdq    $dst,$tmp2\t! mul reduction8L" %}
4661   ins_encode %{
4662     __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister);
4663     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
4664     __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
4665     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4666     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
4667     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4668     __ movdq($tmp$$XMMRegister, $src1$$Register);
4669     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4670     __ movdq($dst$$Register, $tmp2$$XMMRegister);
4671   %}
4672   ins_pipe( pipe_slow );
4673 %}
4674 #endif
4675 
4676 instruct rsmul2F_reduction(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
4677   predicate(UseSSE >= 1 && UseAVX == 0);
4678   match(Set dst (MulReductionVF src1 src2));
4679   effect(TEMP tmp, TEMP tmp2);
4680   format %{ "movdqu  $tmp,$src1\n\t"
4681             "mulss   $tmp,$src2\n\t"
4682             "pshufd  $tmp2,$src2,0x01\n\t"
4683             "mulss   $tmp,$tmp2\n\t"
4684             "movdqu  $dst,$tmp\t! mul reduction2F" %}
4685   ins_encode %{
4686     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
4687     __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
4688     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
4689     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4690     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
4691   %}
4692   ins_pipe( pipe_slow );
4693 %}
4694 
4695 instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
4696   predicate(UseAVX > 0);
4697   match(Set dst (MulReductionVF src1 src2));
4698   effect(TEMP tmp, TEMP tmp2);
4699   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
4700             "pshufd  $tmp,$src2,0x01\n\t"
4701             "vmulss  $dst,$tmp2,$tmp\t! mul reduction2F" %}
4702   ins_encode %{
4703     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4704     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4705     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4706   %}
4707   ins_pipe( pipe_slow );
4708 %}
4709 
4710 instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
4711   predicate(UseSSE >= 1 && UseAVX == 0);
4712   match(Set dst (MulReductionVF src1 src2));
4713   effect(TEMP tmp, TEMP tmp2);
4714   format %{ "movdqu  $tmp,$src1\n\t"
4715             "mulss   $tmp,$src2\n\t"
4716             "pshufd  $tmp2,$src2,0x01\n\t"
4717             "mulss   $tmp,$tmp2\n\t"
4718             "pshufd  $tmp2,$src2,0x02\n\t"
4719             "mulss   $tmp,$tmp2\n\t"
4720             "pshufd  $tmp2,$src2,0x03\n\t"
4721             "mulss   $tmp,$tmp2\n\t"
4722             "movdqu  $dst,$tmp\t! mul reduction4F" %}
4723   ins_encode %{
4724     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
4725     __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
4726     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
4727     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4728     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
4729     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4730     __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
4731     __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4732     __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
4733   %}
4734   ins_pipe( pipe_slow );
4735 %}
4736 
4737 instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
4738   predicate(UseAVX > 0);
4739   match(Set dst (MulReductionVF src1 src2));
4740   effect(TEMP tmp, TEMP tmp2);
4741   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
4742             "pshufd  $tmp,$src2,0x01\n\t"
4743             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4744             "pshufd  $tmp,$src2,0x02\n\t"
4745             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4746             "pshufd  $tmp,$src2,0x03\n\t"
4747             "vmulss  $dst,$tmp2,$tmp\t! mul reduction4F" %}
4748   ins_encode %{
4749     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4750     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4751     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4752     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
4753     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4754     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
4755     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4756   %}
4757   ins_pipe( pipe_slow );
4758 %}
4759 
4760 instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
4761   predicate(UseAVX > 0);
4762   match(Set dst (MulReductionVF src1 src2));
4763   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4764   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
4765             "pshufd  $tmp,$src2,0x01\n\t"
4766             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4767             "pshufd  $tmp,$src2,0x02\n\t"


4779   ins_encode %{
4780     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4781     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4782     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4783     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
4784     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4785     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
4786     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4787     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
4788     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4789     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4790     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4791     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4792     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4793     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4794     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4795   %}
4796   ins_pipe( pipe_slow );
4797 %}
4798 
4799 instruct rvmul16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
4800   predicate(UseAVX > 2);
4801   match(Set dst (MulReductionVF src1 src2));
4802   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4803   format %{ "vmulss  $tmp2,$src1,$src2\n\t"
4804             "pshufd  $tmp,$src2,0x01\n\t"
4805             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4806             "pshufd  $tmp,$src2,0x02\n\t"
4807             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4808             "pshufd  $tmp,$src2,0x03\n\t"
4809             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4810             "vextractf32x4  $tmp3,$src2, 0x1\n\t"
4811             "vmulss  $tmp2,$tmp2,$tmp3\n\t"
4812             "pshufd  $tmp,$tmp3,0x01\n\t"
4813             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4814             "pshufd  $tmp,$tmp3,0x02\n\t"
4815             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4816             "pshufd  $tmp,$tmp3,0x03\n\t"
4817             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4818             "vextractf32x4  $tmp3,$src2, 0x2\n\t"
4819             "vmulss  $tmp2,$tmp2,$tmp3\n\t"
4820             "pshufd  $tmp,$tmp3,0x01\n\t"
4821             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4822             "pshufd  $tmp,$tmp3,0x02\n\t"
4823             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4824             "pshufd  $tmp,$tmp3,0x03\n\t"
4825             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4826             "vextractf32x4  $tmp3,$src2, 0x3\n\t"
4827             "vmulss  $tmp2,$tmp2,$tmp3\n\t"
4828             "pshufd  $tmp,$tmp3,0x01\n\t"
4829             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4830             "pshufd  $tmp,$tmp3,0x02\n\t"
4831             "vmulss  $tmp2,$tmp2,$tmp\n\t"
4832             "pshufd  $tmp,$tmp3,0x03\n\t"
4833             "vmulss  $dst,$tmp2,$tmp\t! mul reduction16F" %}
4834   ins_encode %{
4835     __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4836     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4837     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4838     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
4839     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4840     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
4841     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4842     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
4843     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4844     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4845     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4846     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4847     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4848     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4849     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4850     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
4851     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4852     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4853     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4854     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4855     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4856     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4857     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4858     __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
4859     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4860     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4861     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4862     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4863     __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4864     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4865     __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4866   %}
4867   ins_pipe( pipe_slow );
4868 %}
4869 
4870 instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
4871   predicate(UseSSE >= 1 && UseAVX == 0);
4872   match(Set dst (MulReductionVD src1 src2));
4873   effect(TEMP tmp, TEMP dst);
4874   format %{ "movdqu  $tmp,$src1\n\t"
4875             "mulsd   $tmp,$src2\n\t"
4876             "pshufd  $dst,$src2,0xE\n\t"
4877             "mulsd   $dst,$tmp\t! mul reduction2D" %}
4878   ins_encode %{
4879     __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
4880     __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister);
4881     __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
4882     __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister);
4883   %}
4884   ins_pipe( pipe_slow );
4885 %}
4886 
4887 instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
4888   predicate(UseAVX > 0);
4889   match(Set dst (MulReductionVD src1 src2));
4890   effect(TEMP tmp, TEMP tmp2);
4891   format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
4892             "pshufd  $tmp,$src2,0xE\n\t"
4893             "vmulsd  $dst,$tmp2,$tmp\t! mul reduction2D" %}
4894   ins_encode %{
4895     __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4896     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
4897     __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);


4905   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4906   format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
4907             "pshufd  $tmp,$src2,0xE\n\t"
4908             "vmulsd  $tmp2,$tmp2,$tmp\n\t"
4909             "vextractf128  $tmp3,$src2\n\t"
4910             "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
4911             "pshufd  $tmp,$tmp3,0xE\n\t"
4912             "vmulsd  $dst,$tmp2,$tmp\t! mul reduction4D" %}
4913   ins_encode %{
4914     __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4915     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
4916     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4917     __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
4918     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4919     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4920     __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4921   %}
4922   ins_pipe( pipe_slow );
4923 %}
4924 
4925 instruct rvmul8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{
4926   predicate(UseAVX > 2);
4927   match(Set dst (MulReductionVD src1 src2));
4928   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4929   format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
4930             "pshufd  $tmp,$src2,0xE\n\t"
4931             "vmulsd  $tmp2,$tmp2,$tmp\n\t"
4932             "vextractf64x2  $tmp3,$src2, 0x1\n\t"
4933             "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
4934             "pshufd  $tmp,$src2,0xE\n\t"
4935             "vmulsd  $tmp2,$tmp2,$tmp\n\t"
4936             "vextractf64x2  $tmp3,$src2, 0x2\n\t"
4937             "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
4938             "pshufd  $tmp,$tmp3,0xE\n\t"
4939             "vmulsd  $tmp2,$tmp2,$tmp\n\t"
4940             "vextractf64x2  $tmp3,$src2, 0x3\n\t"
4941             "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
4942             "pshufd  $tmp,$tmp3,0xE\n\t"
4943             "vmulsd  $dst,$tmp2,$tmp\t! mul reduction8D" %}
4944   ins_encode %{
4945     __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4946     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
4947     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4948     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
4949     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4950     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4951     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4952     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
4953     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4954     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4955     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4956     __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
4957     __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4958     __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4959     __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4960   %}
4961   ins_pipe( pipe_slow );
4962 %}
4963 
4964 // ====================VECTOR ARITHMETIC=======================================
4965 
4966 // --------------------------------- ADD --------------------------------------
4967 
4968 // Bytes vector add
4969 instruct vadd4B(vecS dst, vecS src) %{
4970   predicate(n->as_Vector()->length() == 4);
4971   match(Set dst (AddVB dst src));
4972   format %{ "paddb   $dst,$src\t! add packed4B" %}
4973   ins_encode %{
4974     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
4975   %}
4976   ins_pipe( pipe_slow );
4977 %}
4978 
4979 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
4980   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4981   match(Set dst (AddVB src1 src2));
4982   format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
4983   ins_encode %{
4984     int vector_len = 0;
4985     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4986   %}
4987   ins_pipe( pipe_slow );
4988 %}
4989 
4990 instruct vadd8B(vecD dst, vecD src) %{
4991   predicate(n->as_Vector()->length() == 8);
4992   match(Set dst (AddVB dst src));
4993   format %{ "paddb   $dst,$src\t! add packed8B" %}
4994   ins_encode %{
4995     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
4996   %}
4997   ins_pipe( pipe_slow );
4998 %}
4999 
5000 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
5001   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5002   match(Set dst (AddVB src1 src2));
5003   format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
5004   ins_encode %{
5005     int vector_len = 0;
5006     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5007   %}
5008   ins_pipe( pipe_slow );
5009 %}
5010 
5011 instruct vadd16B(vecX dst, vecX src) %{
5012   predicate(n->as_Vector()->length() == 16);
5013   match(Set dst (AddVB dst src));
5014   format %{ "paddb   $dst,$src\t! add packed16B" %}
5015   ins_encode %{
5016     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
5017   %}
5018   ins_pipe( pipe_slow );
5019 %}
5020 
5021 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
5022   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
5023   match(Set dst (AddVB src1 src2));
5024   format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
5025   ins_encode %{
5026     int vector_len = 0;
5027     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5028   %}
5029   ins_pipe( pipe_slow );
5030 %}
5031 
5032 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
5033   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
5034   match(Set dst (AddVB src (LoadVector mem)));
5035   format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
5036   ins_encode %{
5037     int vector_len = 0;
5038     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5039   %}
5040   ins_pipe( pipe_slow );
5041 %}
5042 
5043 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
5044   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
5045   match(Set dst (AddVB src1 src2));
5046   format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
5047   ins_encode %{
5048     int vector_len = 1;
5049     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5050   %}
5051   ins_pipe( pipe_slow );
5052 %}
5053 
5054 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
5055   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
5056   match(Set dst (AddVB src (LoadVector mem)));
5057   format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
5058   ins_encode %{
5059     int vector_len = 1;
5060     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5061   %}
5062   ins_pipe( pipe_slow );
5063 %}
5064 
5065 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
5066   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
5067   match(Set dst (AddVB src1 src2));
5068   format %{ "vpaddb  $dst,$src1,$src2\t! add packed64B" %}
5069   ins_encode %{
5070     int vector_len = 2;
5071     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5072   %}
5073   ins_pipe( pipe_slow );
5074 %}
5075 
5076 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{
5077   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
5078   match(Set dst (AddVB src (LoadVector mem)));
5079   format %{ "vpaddb  $dst,$src,$mem\t! add packed64B" %}
5080   ins_encode %{
5081     int vector_len = 2;
5082     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5083   %}
5084   ins_pipe( pipe_slow );
5085 %}
5086 
5087 // Shorts/Chars vector add
5088 instruct vadd2S(vecS dst, vecS src) %{
5089   predicate(n->as_Vector()->length() == 2);
5090   match(Set dst (AddVS dst src));
5091   format %{ "paddw   $dst,$src\t! add packed2S" %}
5092   ins_encode %{
5093     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
5094   %}
5095   ins_pipe( pipe_slow );
5096 %}
5097 
5098 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
5099   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5100   match(Set dst (AddVS src1 src2));
5101   format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
5102   ins_encode %{
5103     int vector_len = 0;
5104     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5105   %}
5106   ins_pipe( pipe_slow );
5107 %}
5108 
5109 instruct vadd4S(vecD dst, vecD src) %{
5110   predicate(n->as_Vector()->length() == 4);
5111   match(Set dst (AddVS dst src));
5112   format %{ "paddw   $dst,$src\t! add packed4S" %}
5113   ins_encode %{
5114     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
5115   %}
5116   ins_pipe( pipe_slow );
5117 %}
5118 
5119 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
5120   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5121   match(Set dst (AddVS src1 src2));
5122   format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
5123   ins_encode %{
5124     int vector_len = 0;
5125     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5126   %}
5127   ins_pipe( pipe_slow );
5128 %}
5129 
5130 instruct vadd8S(vecX dst, vecX src) %{
5131   predicate(n->as_Vector()->length() == 8);
5132   match(Set dst (AddVS dst src));
5133   format %{ "paddw   $dst,$src\t! add packed8S" %}
5134   ins_encode %{
5135     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
5136   %}
5137   ins_pipe( pipe_slow );
5138 %}
5139 
5140 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
5141   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5142   match(Set dst (AddVS src1 src2));
5143   format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
5144   ins_encode %{
5145     int vector_len = 0;
5146     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5147   %}
5148   ins_pipe( pipe_slow );
5149 %}
5150 
5151 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
5152   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5153   match(Set dst (AddVS src (LoadVector mem)));
5154   format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
5155   ins_encode %{
5156     int vector_len = 0;
5157     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5158   %}
5159   ins_pipe( pipe_slow );
5160 %}
5161 
5162 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
5163   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
5164   match(Set dst (AddVS src1 src2));
5165   format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
5166   ins_encode %{
5167     int vector_len = 1;
5168     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5169   %}
5170   ins_pipe( pipe_slow );
5171 %}
5172 
5173 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
5174   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
5175   match(Set dst (AddVS src (LoadVector mem)));
5176   format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
5177   ins_encode %{
5178     int vector_len = 1;
5179     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5180   %}
5181   ins_pipe( pipe_slow );
5182 %}
5183 
5184 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
5185   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
5186   match(Set dst (AddVS src1 src2));
5187   format %{ "vpaddw  $dst,$src1,$src2\t! add packed32S" %}
5188   ins_encode %{
5189     int vector_len = 2;
5190     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5191   %}
5192   ins_pipe( pipe_slow );
5193 %}
5194 
5195 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{
5196   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
5197   match(Set dst (AddVS src (LoadVector mem)));
5198   format %{ "vpaddw  $dst,$src,$mem\t! add packed32S" %}
5199   ins_encode %{
5200     int vector_len = 2;
5201     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5202   %}
5203   ins_pipe( pipe_slow );
5204 %}
5205 
5206 // Integers vector add
5207 instruct vadd2I(vecD dst, vecD src) %{
5208   predicate(n->as_Vector()->length() == 2);
5209   match(Set dst (AddVI dst src));
5210   format %{ "paddd   $dst,$src\t! add packed2I" %}
5211   ins_encode %{
5212     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
5213   %}
5214   ins_pipe( pipe_slow );
5215 %}
5216 
5217 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
5218   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5219   match(Set dst (AddVI src1 src2));
5220   format %{ "vpaddd  $dst,$src1,$src2\t! add packed2I" %}
5221   ins_encode %{
5222     int vector_len = 0;
5223     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5224   %}
5225   ins_pipe( pipe_slow );
5226 %}
5227 
5228 instruct vadd4I(vecX dst, vecX src) %{
5229   predicate(n->as_Vector()->length() == 4);
5230   match(Set dst (AddVI dst src));
5231   format %{ "paddd   $dst,$src\t! add packed4I" %}
5232   ins_encode %{
5233     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
5234   %}
5235   ins_pipe( pipe_slow );
5236 %}
5237 
5238 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
5239   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5240   match(Set dst (AddVI src1 src2));
5241   format %{ "vpaddd  $dst,$src1,$src2\t! add packed4I" %}
5242   ins_encode %{
5243     int vector_len = 0;
5244     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5245   %}
5246   ins_pipe( pipe_slow );
5247 %}
5248 
5249 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
5250   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5251   match(Set dst (AddVI src (LoadVector mem)));
5252   format %{ "vpaddd  $dst,$src,$mem\t! add packed4I" %}
5253   ins_encode %{
5254     int vector_len = 0;
5255     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5256   %}
5257   ins_pipe( pipe_slow );
5258 %}
5259 
5260 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
5261   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5262   match(Set dst (AddVI src1 src2));
5263   format %{ "vpaddd  $dst,$src1,$src2\t! add packed8I" %}
5264   ins_encode %{
5265     int vector_len = 1;
5266     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5267   %}
5268   ins_pipe( pipe_slow );
5269 %}
5270 
5271 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
5272   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5273   match(Set dst (AddVI src (LoadVector mem)));
5274   format %{ "vpaddd  $dst,$src,$mem\t! add packed8I" %}
5275   ins_encode %{
5276     int vector_len = 1;
5277     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5278   %}
5279   ins_pipe( pipe_slow );
5280 %}
5281 
5282 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{
5283   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5284   match(Set dst (AddVI src1 src2));
5285   format %{ "vpaddd  $dst,$src1,$src2\t! add packed16I" %}
5286   ins_encode %{
5287     int vector_len = 2;
5288     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5289   %}
5290   ins_pipe( pipe_slow );
5291 %}
5292 
5293 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{
5294   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5295   match(Set dst (AddVI src (LoadVector mem)));
5296   format %{ "vpaddd  $dst,$src,$mem\t! add packed16I" %}
5297   ins_encode %{
5298     int vector_len = 2;
5299     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5300   %}
5301   ins_pipe( pipe_slow );
5302 %}
5303 
5304 // Longs vector add
5305 instruct vadd2L(vecX dst, vecX src) %{
5306   predicate(n->as_Vector()->length() == 2);
5307   match(Set dst (AddVL dst src));
5308   format %{ "paddq   $dst,$src\t! add packed2L" %}
5309   ins_encode %{
5310     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
5311   %}
5312   ins_pipe( pipe_slow );
5313 %}
5314 
5315 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
5316   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5317   match(Set dst (AddVL src1 src2));
5318   format %{ "vpaddq  $dst,$src1,$src2\t! add packed2L" %}
5319   ins_encode %{
5320     int vector_len = 0;
5321     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5322   %}
5323   ins_pipe( pipe_slow );
5324 %}
5325 
5326 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
5327   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5328   match(Set dst (AddVL src (LoadVector mem)));
5329   format %{ "vpaddq  $dst,$src,$mem\t! add packed2L" %}
5330   ins_encode %{
5331     int vector_len = 0;
5332     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5333   %}
5334   ins_pipe( pipe_slow );
5335 %}
5336 
5337 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
5338   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
5339   match(Set dst (AddVL src1 src2));
5340   format %{ "vpaddq  $dst,$src1,$src2\t! add packed4L" %}
5341   ins_encode %{
5342     int vector_len = 1;
5343     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5344   %}
5345   ins_pipe( pipe_slow );
5346 %}
5347 
5348 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
5349   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
5350   match(Set dst (AddVL src (LoadVector mem)));
5351   format %{ "vpaddq  $dst,$src,$mem\t! add packed4L" %}
5352   ins_encode %{
5353     int vector_len = 1;
5354     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5355   %}
5356   ins_pipe( pipe_slow );
5357 %}
5358 
5359 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{
5360   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
5361   match(Set dst (AddVL src1 src2));
5362   format %{ "vpaddq  $dst,$src1,$src2\t! add packed8L" %}
5363   ins_encode %{
5364     int vector_len = 2;
5365     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5366   %}
5367   ins_pipe( pipe_slow );
5368 %}
5369 
5370 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{
5371   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
5372   match(Set dst (AddVL src (LoadVector mem)));
5373   format %{ "vpaddq  $dst,$src,$mem\t! add packed8L" %}
5374   ins_encode %{
5375     int vector_len = 2;
5376     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5377   %}
5378   ins_pipe( pipe_slow );
5379 %}
5380 
5381 // Floats vector add
5382 instruct vadd2F(vecD dst, vecD src) %{
5383   predicate(n->as_Vector()->length() == 2);
5384   match(Set dst (AddVF dst src));
5385   format %{ "addps   $dst,$src\t! add packed2F" %}
5386   ins_encode %{
5387     __ addps($dst$$XMMRegister, $src$$XMMRegister);
5388   %}
5389   ins_pipe( pipe_slow );
5390 %}
5391 
5392 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
5393   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5394   match(Set dst (AddVF src1 src2));
5395   format %{ "vaddps  $dst,$src1,$src2\t! add packed2F" %}
5396   ins_encode %{
5397     int vector_len = 0;
5398     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5399   %}
5400   ins_pipe( pipe_slow );
5401 %}
5402 
5403 instruct vadd4F(vecX dst, vecX src) %{
5404   predicate(n->as_Vector()->length() == 4);
5405   match(Set dst (AddVF dst src));
5406   format %{ "addps   $dst,$src\t! add packed4F" %}
5407   ins_encode %{
5408     __ addps($dst$$XMMRegister, $src$$XMMRegister);
5409   %}
5410   ins_pipe( pipe_slow );
5411 %}
5412 
5413 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
5414   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5415   match(Set dst (AddVF src1 src2));
5416   format %{ "vaddps  $dst,$src1,$src2\t! add packed4F" %}
5417   ins_encode %{
5418     int vector_len = 0;
5419     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5420   %}
5421   ins_pipe( pipe_slow );
5422 %}
5423 
5424 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
5425   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5426   match(Set dst (AddVF src (LoadVector mem)));
5427   format %{ "vaddps  $dst,$src,$mem\t! add packed4F" %}
5428   ins_encode %{
5429     int vector_len = 0;
5430     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5431   %}
5432   ins_pipe( pipe_slow );
5433 %}
5434 
5435 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
5436   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5437   match(Set dst (AddVF src1 src2));
5438   format %{ "vaddps  $dst,$src1,$src2\t! add packed8F" %}
5439   ins_encode %{
5440     int vector_len = 1;
5441     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5442   %}
5443   ins_pipe( pipe_slow );
5444 %}
5445 
5446 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
5447   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5448   match(Set dst (AddVF src (LoadVector mem)));
5449   format %{ "vaddps  $dst,$src,$mem\t! add packed8F" %}
5450   ins_encode %{
5451     int vector_len = 1;
5452     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5453   %}
5454   ins_pipe( pipe_slow );
5455 %}
5456 
5457 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
5458   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5459   match(Set dst (AddVF src1 src2));
5460   format %{ "vaddps  $dst,$src1,$src2\t! add packed16F" %}
5461   ins_encode %{
5462     int vector_len = 2;
5463     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5464   %}
5465   ins_pipe( pipe_slow );
5466 %}
5467 
5468 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{
5469   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5470   match(Set dst (AddVF src (LoadVector mem)));
5471   format %{ "vaddps  $dst,$src,$mem\t! add packed16F" %}
5472   ins_encode %{
5473     int vector_len = 2;
5474     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5475   %}
5476   ins_pipe( pipe_slow );
5477 %}
5478 
5479 // Doubles vector add
5480 instruct vadd2D(vecX dst, vecX src) %{
5481   predicate(n->as_Vector()->length() == 2);
5482   match(Set dst (AddVD dst src));
5483   format %{ "addpd   $dst,$src\t! add packed2D" %}
5484   ins_encode %{
5485     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
5486   %}
5487   ins_pipe( pipe_slow );
5488 %}
5489 
5490 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
5491   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5492   match(Set dst (AddVD src1 src2));
5493   format %{ "vaddpd  $dst,$src1,$src2\t! add packed2D" %}
5494   ins_encode %{
5495     int vector_len = 0;
5496     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5497   %}
5498   ins_pipe( pipe_slow );
5499 %}
5500 
5501 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
5502   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5503   match(Set dst (AddVD src (LoadVector mem)));
5504   format %{ "vaddpd  $dst,$src,$mem\t! add packed2D" %}
5505   ins_encode %{
5506     int vector_len = 0;
5507     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5508   %}
5509   ins_pipe( pipe_slow );
5510 %}
5511 
5512 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
5513   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5514   match(Set dst (AddVD src1 src2));
5515   format %{ "vaddpd  $dst,$src1,$src2\t! add packed4D" %}
5516   ins_encode %{
5517     int vector_len = 1;
5518     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5519   %}
5520   ins_pipe( pipe_slow );
5521 %}
5522 
5523 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
5524   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5525   match(Set dst (AddVD src (LoadVector mem)));
5526   format %{ "vaddpd  $dst,$src,$mem\t! add packed4D" %}
5527   ins_encode %{
5528     int vector_len = 1;
5529     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5530   %}
5531   ins_pipe( pipe_slow );
5532 %}
5533 
5534 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
5535   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
5536   match(Set dst (AddVD src1 src2));
5537   format %{ "vaddpd  $dst,$src1,$src2\t! add packed8D" %}
5538   ins_encode %{
5539     int vector_len = 2;
5540     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5541   %}
5542   ins_pipe( pipe_slow );
5543 %}
5544 
5545 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{
5546   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
5547   match(Set dst (AddVD src (LoadVector mem)));
5548   format %{ "vaddpd  $dst,$src,$mem\t! add packed8D" %}
5549   ins_encode %{
5550     int vector_len = 2;
5551     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5552   %}
5553   ins_pipe( pipe_slow );
5554 %}
5555 
5556 // --------------------------------- SUB --------------------------------------
5557 
5558 // Bytes vector sub
5559 instruct vsub4B(vecS dst, vecS src) %{
5560   predicate(n->as_Vector()->length() == 4);
5561   match(Set dst (SubVB dst src));
5562   format %{ "psubb   $dst,$src\t! sub packed4B" %}
5563   ins_encode %{
5564     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
5565   %}
5566   ins_pipe( pipe_slow );
5567 %}
5568 
5569 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
5570   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5571   match(Set dst (SubVB src1 src2));
5572   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
5573   ins_encode %{
5574     int vector_len = 0;
5575     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5576   %}
5577   ins_pipe( pipe_slow );
5578 %}
5579 
5580 instruct vsub8B(vecD dst, vecD src) %{
5581   predicate(n->as_Vector()->length() == 8);
5582   match(Set dst (SubVB dst src));
5583   format %{ "psubb   $dst,$src\t! sub packed8B" %}
5584   ins_encode %{
5585     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
5586   %}
5587   ins_pipe( pipe_slow );
5588 %}
5589 
5590 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
5591   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5592   match(Set dst (SubVB src1 src2));
5593   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
5594   ins_encode %{
5595     int vector_len = 0;
5596     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5597   %}
5598   ins_pipe( pipe_slow );
5599 %}
5600 
5601 instruct vsub16B(vecX dst, vecX src) %{
5602   predicate(n->as_Vector()->length() == 16);
5603   match(Set dst (SubVB dst src));
5604   format %{ "psubb   $dst,$src\t! sub packed16B" %}
5605   ins_encode %{
5606     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
5607   %}
5608   ins_pipe( pipe_slow );
5609 %}
5610 
5611 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
5612   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
5613   match(Set dst (SubVB src1 src2));
5614   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
5615   ins_encode %{
5616     int vector_len = 0;
5617     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5618   %}
5619   ins_pipe( pipe_slow );
5620 %}
5621 
5622 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
5623   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
5624   match(Set dst (SubVB src (LoadVector mem)));
5625   format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
5626   ins_encode %{
5627     int vector_len = 0;
5628     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5629   %}
5630   ins_pipe( pipe_slow );
5631 %}
5632 
5633 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
5634   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
5635   match(Set dst (SubVB src1 src2));
5636   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
5637   ins_encode %{
5638     int vector_len = 1;
5639     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5640   %}
5641   ins_pipe( pipe_slow );
5642 %}
5643 
5644 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
5645   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
5646   match(Set dst (SubVB src (LoadVector mem)));
5647   format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
5648   ins_encode %{
5649     int vector_len = 1;
5650     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5651   %}
5652   ins_pipe( pipe_slow );
5653 %}
5654 
5655 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
5656   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
5657   match(Set dst (SubVB src1 src2));
5658   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed64B" %}
5659   ins_encode %{
5660     int vector_len = 2;
5661     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5662   %}
5663   ins_pipe( pipe_slow );
5664 %}
5665 
5666 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{
5667   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
5668   match(Set dst (SubVB src (LoadVector mem)));
5669   format %{ "vpsubb  $dst,$src,$mem\t! sub packed64B" %}
5670   ins_encode %{
5671     int vector_len = 2;
5672     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5673   %}
5674   ins_pipe( pipe_slow );
5675 %}
5676 
5677 // Shorts/Chars vector sub
5678 instruct vsub2S(vecS dst, vecS src) %{
5679   predicate(n->as_Vector()->length() == 2);
5680   match(Set dst (SubVS dst src));
5681   format %{ "psubw   $dst,$src\t! sub packed2S" %}
5682   ins_encode %{
5683     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
5684   %}
5685   ins_pipe( pipe_slow );
5686 %}
5687 
5688 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
5689   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5690   match(Set dst (SubVS src1 src2));
5691   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
5692   ins_encode %{
5693     int vector_len = 0;
5694     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5695   %}
5696   ins_pipe( pipe_slow );
5697 %}
5698 
5699 instruct vsub4S(vecD dst, vecD src) %{
5700   predicate(n->as_Vector()->length() == 4);
5701   match(Set dst (SubVS dst src));
5702   format %{ "psubw   $dst,$src\t! sub packed4S" %}
5703   ins_encode %{
5704     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
5705   %}
5706   ins_pipe( pipe_slow );
5707 %}
5708 
5709 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
5710   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5711   match(Set dst (SubVS src1 src2));
5712   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
5713   ins_encode %{
5714     int vector_len = 0;
5715     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5716   %}
5717   ins_pipe( pipe_slow );
5718 %}
5719 
5720 instruct vsub8S(vecX dst, vecX src) %{
5721   predicate(n->as_Vector()->length() == 8);
5722   match(Set dst (SubVS dst src));
5723   format %{ "psubw   $dst,$src\t! sub packed8S" %}
5724   ins_encode %{
5725     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
5726   %}
5727   ins_pipe( pipe_slow );
5728 %}
5729 
5730 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
5731   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5732   match(Set dst (SubVS src1 src2));
5733   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
5734   ins_encode %{
5735     int vector_len = 0;
5736     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5737   %}
5738   ins_pipe( pipe_slow );
5739 %}
5740 
5741 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
5742   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5743   match(Set dst (SubVS src (LoadVector mem)));
5744   format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
5745   ins_encode %{
5746     int vector_len = 0;
5747     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5748   %}
5749   ins_pipe( pipe_slow );
5750 %}
5751 
5752 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
5753   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
5754   match(Set dst (SubVS src1 src2));
5755   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
5756   ins_encode %{
5757     int vector_len = 1;
5758     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5759   %}
5760   ins_pipe( pipe_slow );
5761 %}
5762 
5763 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
5764   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
5765   match(Set dst (SubVS src (LoadVector mem)));
5766   format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
5767   ins_encode %{
5768     int vector_len = 1;
5769     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5770   %}
5771   ins_pipe( pipe_slow );
5772 %}
5773 
5774 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
5775   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
5776   match(Set dst (SubVS src1 src2));
5777   format %{ "vpsubw  $dst,$src1,$src2\t! sub packed32S" %}
5778   ins_encode %{
5779     int vector_len = 2;
5780     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5781   %}
5782   ins_pipe( pipe_slow );
5783 %}
5784 
5785 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{
5786   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
5787   match(Set dst (SubVS src (LoadVector mem)));
5788   format %{ "vpsubw  $dst,$src,$mem\t! sub packed32S" %}
5789   ins_encode %{
5790     int vector_len = 2;
5791     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5792   %}
5793   ins_pipe( pipe_slow );
5794 %}
5795 
5796 // Integers vector sub
5797 instruct vsub2I(vecD dst, vecD src) %{
5798   predicate(n->as_Vector()->length() == 2);
5799   match(Set dst (SubVI dst src));
5800   format %{ "psubd   $dst,$src\t! sub packed2I" %}
5801   ins_encode %{
5802     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
5803   %}
5804   ins_pipe( pipe_slow );
5805 %}
5806 
5807 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
5808   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5809   match(Set dst (SubVI src1 src2));
5810   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed2I" %}
5811   ins_encode %{
5812     int vector_len = 0;
5813     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5814   %}
5815   ins_pipe( pipe_slow );
5816 %}
5817 
5818 instruct vsub4I(vecX dst, vecX src) %{
5819   predicate(n->as_Vector()->length() == 4);
5820   match(Set dst (SubVI dst src));
5821   format %{ "psubd   $dst,$src\t! sub packed4I" %}
5822   ins_encode %{
5823     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
5824   %}
5825   ins_pipe( pipe_slow );
5826 %}
5827 
5828 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
5829   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5830   match(Set dst (SubVI src1 src2));
5831   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed4I" %}
5832   ins_encode %{
5833     int vector_len = 0;
5834     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5835   %}
5836   ins_pipe( pipe_slow );
5837 %}
5838 
5839 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
5840   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5841   match(Set dst (SubVI src (LoadVector mem)));
5842   format %{ "vpsubd  $dst,$src,$mem\t! sub packed4I" %}
5843   ins_encode %{
5844     int vector_len = 0;
5845     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5846   %}
5847   ins_pipe( pipe_slow );
5848 %}
5849 
5850 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
5851   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5852   match(Set dst (SubVI src1 src2));
5853   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed8I" %}
5854   ins_encode %{
5855     int vector_len = 1;
5856     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5857   %}
5858   ins_pipe( pipe_slow );
5859 %}
5860 
5861 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
5862   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5863   match(Set dst (SubVI src (LoadVector mem)));
5864   format %{ "vpsubd  $dst,$src,$mem\t! sub packed8I" %}
5865   ins_encode %{
5866     int vector_len = 1;
5867     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5868   %}
5869   ins_pipe( pipe_slow );
5870 %}
5871 
5872 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{
5873   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5874   match(Set dst (SubVI src1 src2));
5875   format %{ "vpsubd  $dst,$src1,$src2\t! sub packed16I" %}
5876   ins_encode %{
5877     int vector_len = 2;
5878     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5879   %}
5880   ins_pipe( pipe_slow );
5881 %}
5882 
5883 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{
5884   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5885   match(Set dst (SubVI src (LoadVector mem)));
5886   format %{ "vpsubd  $dst,$src,$mem\t! sub packed16I" %}
5887   ins_encode %{
5888     int vector_len = 2;
5889     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5890   %}
5891   ins_pipe( pipe_slow );
5892 %}
5893 
5894 // Longs vector sub
5895 instruct vsub2L(vecX dst, vecX src) %{
5896   predicate(n->as_Vector()->length() == 2);
5897   match(Set dst (SubVL dst src));
5898   format %{ "psubq   $dst,$src\t! sub packed2L" %}
5899   ins_encode %{
5900     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
5901   %}
5902   ins_pipe( pipe_slow );
5903 %}
5904 
5905 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
5906   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5907   match(Set dst (SubVL src1 src2));
5908   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed2L" %}
5909   ins_encode %{
5910     int vector_len = 0;
5911     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5912   %}
5913   ins_pipe( pipe_slow );
5914 %}
5915 
5916 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
5917   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5918   match(Set dst (SubVL src (LoadVector mem)));
5919   format %{ "vpsubq  $dst,$src,$mem\t! sub packed2L" %}
5920   ins_encode %{
5921     int vector_len = 0;
5922     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5923   %}
5924   ins_pipe( pipe_slow );
5925 %}
5926 
5927 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
5928   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
5929   match(Set dst (SubVL src1 src2));
5930   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed4L" %}
5931   ins_encode %{
5932     int vector_len = 1;
5933     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5934   %}
5935   ins_pipe( pipe_slow );
5936 %}
5937 
5938 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
5939   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
5940   match(Set dst (SubVL src (LoadVector mem)));
5941   format %{ "vpsubq  $dst,$src,$mem\t! sub packed4L" %}
5942   ins_encode %{
5943     int vector_len = 1;
5944     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5945   %}
5946   ins_pipe( pipe_slow );
5947 %}
5948 
5949 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{
5950   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
5951   match(Set dst (SubVL src1 src2));
5952   format %{ "vpsubq  $dst,$src1,$src2\t! sub packed8L" %}
5953   ins_encode %{
5954     int vector_len = 2;
5955     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5956   %}
5957   ins_pipe( pipe_slow );
5958 %}
5959 
5960 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{
5961   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
5962   match(Set dst (SubVL src (LoadVector mem)));
5963   format %{ "vpsubq  $dst,$src,$mem\t! sub packed8L" %}
5964   ins_encode %{
5965     int vector_len = 2;
5966     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5967   %}
5968   ins_pipe( pipe_slow );
5969 %}
5970 
5971 // Floats vector sub
5972 instruct vsub2F(vecD dst, vecD src) %{
5973   predicate(n->as_Vector()->length() == 2);
5974   match(Set dst (SubVF dst src));
5975   format %{ "subps   $dst,$src\t! sub packed2F" %}
5976   ins_encode %{
5977     __ subps($dst$$XMMRegister, $src$$XMMRegister);
5978   %}
5979   ins_pipe( pipe_slow );
5980 %}
5981 
5982 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
5983   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5984   match(Set dst (SubVF src1 src2));
5985   format %{ "vsubps  $dst,$src1,$src2\t! sub packed2F" %}
5986   ins_encode %{
5987     int vector_len = 0;
5988     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5989   %}
5990   ins_pipe( pipe_slow );
5991 %}
5992 
5993 instruct vsub4F(vecX dst, vecX src) %{
5994   predicate(n->as_Vector()->length() == 4);
5995   match(Set dst (SubVF dst src));
5996   format %{ "subps   $dst,$src\t! sub packed4F" %}
5997   ins_encode %{
5998     __ subps($dst$$XMMRegister, $src$$XMMRegister);
5999   %}
6000   ins_pipe( pipe_slow );
6001 %}
6002 
6003 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
6004   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6005   match(Set dst (SubVF src1 src2));
6006   format %{ "vsubps  $dst,$src1,$src2\t! sub packed4F" %}
6007   ins_encode %{
6008     int vector_len = 0;
6009     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6010   %}
6011   ins_pipe( pipe_slow );
6012 %}
6013 
6014 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
6015   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6016   match(Set dst (SubVF src (LoadVector mem)));
6017   format %{ "vsubps  $dst,$src,$mem\t! sub packed4F" %}
6018   ins_encode %{
6019     int vector_len = 0;
6020     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6021   %}
6022   ins_pipe( pipe_slow );
6023 %}
6024 
6025 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
6026   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6027   match(Set dst (SubVF src1 src2));
6028   format %{ "vsubps  $dst,$src1,$src2\t! sub packed8F" %}
6029   ins_encode %{
6030     int vector_len = 1;
6031     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6032   %}
6033   ins_pipe( pipe_slow );
6034 %}
6035 
6036 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
6037   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6038   match(Set dst (SubVF src (LoadVector mem)));
6039   format %{ "vsubps  $dst,$src,$mem\t! sub packed8F" %}
6040   ins_encode %{
6041     int vector_len = 1;
6042     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6043   %}
6044   ins_pipe( pipe_slow );
6045 %}
6046 
6047 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
6048   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6049   match(Set dst (SubVF src1 src2));
6050   format %{ "vsubps  $dst,$src1,$src2\t! sub packed16F" %}
6051   ins_encode %{
6052     int vector_len = 2;
6053     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6054   %}
6055   ins_pipe( pipe_slow );
6056 %}
6057 
6058 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{
6059   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6060   match(Set dst (SubVF src (LoadVector mem)));
6061   format %{ "vsubps  $dst,$src,$mem\t! sub packed16F" %}
6062   ins_encode %{
6063     int vector_len = 2;
6064     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6065   %}
6066   ins_pipe( pipe_slow );
6067 %}
6068 
6069 // Doubles vector sub
6070 instruct vsub2D(vecX dst, vecX src) %{
6071   predicate(n->as_Vector()->length() == 2);
6072   match(Set dst (SubVD dst src));
6073   format %{ "subpd   $dst,$src\t! sub packed2D" %}
6074   ins_encode %{
6075     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
6076   %}
6077   ins_pipe( pipe_slow );
6078 %}
6079 
6080 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
6081   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6082   match(Set dst (SubVD src1 src2));
6083   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed2D" %}
6084   ins_encode %{
6085     int vector_len = 0;
6086     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6087   %}
6088   ins_pipe( pipe_slow );
6089 %}
6090 
6091 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
6092   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6093   match(Set dst (SubVD src (LoadVector mem)));
6094   format %{ "vsubpd  $dst,$src,$mem\t! sub packed2D" %}
6095   ins_encode %{
6096     int vector_len = 0;
6097     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6098   %}
6099   ins_pipe( pipe_slow );
6100 %}
6101 
6102 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
6103   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6104   match(Set dst (SubVD src1 src2));
6105   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed4D" %}
6106   ins_encode %{
6107     int vector_len = 1;
6108     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6109   %}
6110   ins_pipe( pipe_slow );
6111 %}
6112 
6113 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
6114   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6115   match(Set dst (SubVD src (LoadVector mem)));
6116   format %{ "vsubpd  $dst,$src,$mem\t! sub packed4D" %}
6117   ins_encode %{
6118     int vector_len = 1;
6119     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6120   %}
6121   ins_pipe( pipe_slow );
6122 %}
6123 
6124 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
6125   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6126   match(Set dst (SubVD src1 src2));
6127   format %{ "vsubpd  $dst,$src1,$src2\t! sub packed8D" %}
6128   ins_encode %{
6129     int vector_len = 2;
6130     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6131   %}
6132   ins_pipe( pipe_slow );
6133 %}
6134 
6135 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{
6136   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6137   match(Set dst (SubVD src (LoadVector mem)));
6138   format %{ "vsubpd  $dst,$src,$mem\t! sub packed8D" %}
6139   ins_encode %{
6140     int vector_len = 2;
6141     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6142   %}
6143   ins_pipe( pipe_slow );
6144 %}
6145 
6146 // --------------------------------- MUL --------------------------------------
6147 
6148 // Shorts/Chars vector mul
6149 instruct vmul2S(vecS dst, vecS src) %{
6150   predicate(n->as_Vector()->length() == 2);
6151   match(Set dst (MulVS dst src));
6152   format %{ "pmullw $dst,$src\t! mul packed2S" %}
6153   ins_encode %{
6154     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
6155   %}
6156   ins_pipe( pipe_slow );
6157 %}
6158 
6159 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
6160   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6161   match(Set dst (MulVS src1 src2));
6162   format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
6163   ins_encode %{
6164     int vector_len = 0;
6165     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6166   %}
6167   ins_pipe( pipe_slow );
6168 %}
6169 
6170 instruct vmul4S(vecD dst, vecD src) %{
6171   predicate(n->as_Vector()->length() == 4);
6172   match(Set dst (MulVS dst src));
6173   format %{ "pmullw  $dst,$src\t! mul packed4S" %}
6174   ins_encode %{
6175     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
6176   %}
6177   ins_pipe( pipe_slow );
6178 %}
6179 
6180 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
6181   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6182   match(Set dst (MulVS src1 src2));
6183   format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
6184   ins_encode %{
6185     int vector_len = 0;
6186     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6187   %}
6188   ins_pipe( pipe_slow );
6189 %}
6190 
6191 instruct vmul8S(vecX dst, vecX src) %{
6192   predicate(n->as_Vector()->length() == 8);
6193   match(Set dst (MulVS dst src));
6194   format %{ "pmullw  $dst,$src\t! mul packed8S" %}
6195   ins_encode %{
6196     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
6197   %}
6198   ins_pipe( pipe_slow );
6199 %}
6200 
6201 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
6202   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6203   match(Set dst (MulVS src1 src2));
6204   format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
6205   ins_encode %{
6206     int vector_len = 0;
6207     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6208   %}
6209   ins_pipe( pipe_slow );
6210 %}
6211 
6212 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
6213   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6214   match(Set dst (MulVS src (LoadVector mem)));
6215   format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
6216   ins_encode %{
6217     int vector_len = 0;
6218     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6219   %}
6220   ins_pipe( pipe_slow );
6221 %}
6222 
6223 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
6224   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
6225   match(Set dst (MulVS src1 src2));
6226   format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
6227   ins_encode %{
6228     int vector_len = 1;
6229     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6230   %}
6231   ins_pipe( pipe_slow );
6232 %}
6233 
6234 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
6235   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
6236   match(Set dst (MulVS src (LoadVector mem)));
6237   format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
6238   ins_encode %{
6239     int vector_len = 1;
6240     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6241   %}
6242   ins_pipe( pipe_slow );
6243 %}
6244 
6245 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
6246   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
6247   match(Set dst (MulVS src1 src2));
6248   format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %}
6249   ins_encode %{
6250     int vector_len = 2;
6251     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6252   %}
6253   ins_pipe( pipe_slow );
6254 %}
6255 
6256 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{
6257   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
6258   match(Set dst (MulVS src (LoadVector mem)));
6259   format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %}
6260   ins_encode %{
6261     int vector_len = 2;
6262     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6263   %}
6264   ins_pipe( pipe_slow );
6265 %}
6266 
6267 // Integers vector mul (sse4_1)
6268 instruct vmul2I(vecD dst, vecD src) %{
6269   predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
6270   match(Set dst (MulVI dst src));
6271   format %{ "pmulld  $dst,$src\t! mul packed2I" %}
6272   ins_encode %{
6273     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
6274   %}
6275   ins_pipe( pipe_slow );
6276 %}
6277 
6278 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
6279   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6280   match(Set dst (MulVI src1 src2));
6281   format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
6282   ins_encode %{
6283     int vector_len = 0;
6284     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6285   %}
6286   ins_pipe( pipe_slow );
6287 %}
6288 
6289 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{
6290   predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq());
6291   match(Set dst (MulVL src1 src2));
6292   format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %}
6293   ins_encode %{
6294     int vector_len = 0;
6295     __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6296   %}
6297   ins_pipe( pipe_slow );
6298 %}
6299 
6300 instruct vmul4I(vecX dst, vecX src) %{
6301   predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
6302   match(Set dst (MulVI dst src));
6303   format %{ "pmulld  $dst,$src\t! mul packed4I" %}
6304   ins_encode %{
6305     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
6306   %}
6307   ins_pipe( pipe_slow );
6308 %}
6309 
6310 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
6311   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6312   match(Set dst (MulVI src1 src2));
6313   format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
6314   ins_encode %{
6315     int vector_len = 0;
6316     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6317   %}
6318   ins_pipe( pipe_slow );
6319 %}
6320 
6321 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
6322   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6323   match(Set dst (MulVI src (LoadVector mem)));
6324   format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
6325   ins_encode %{
6326     int vector_len = 0;
6327     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6328   %}
6329   ins_pipe( pipe_slow );
6330 %}
6331 
6332 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{
6333   predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq());
6334   match(Set dst (MulVL src1 src2));
6335   format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %}
6336   ins_encode %{
6337     int vector_len = 1;
6338     __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6339   %}
6340   ins_pipe( pipe_slow );
6341 %}
6342 
6343 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{
6344   predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq());
6345   match(Set dst (MulVL src (LoadVector mem)));
6346   format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %}
6347   ins_encode %{
6348     int vector_len = 1;
6349     __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6350   %}
6351   ins_pipe( pipe_slow );
6352 %}
6353 
6354 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
6355   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
6356   match(Set dst (MulVI src1 src2));
6357   format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
6358   ins_encode %{
6359     int vector_len = 1;
6360     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6361   %}
6362   ins_pipe( pipe_slow );
6363 %}
6364 
6365 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{
6366   predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq());
6367   match(Set dst (MulVL src1 src2));
6368   format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %}
6369   ins_encode %{
6370     int vector_len = 2;
6371     __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6372   %}
6373   ins_pipe( pipe_slow );
6374 %}
6375 
6376 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{
6377   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6378   match(Set dst (MulVI src1 src2));
6379   format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %}
6380   ins_encode %{
6381     int vector_len = 2;
6382     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6383   %}
6384   ins_pipe( pipe_slow );
6385 %}
6386 
6387 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
6388   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
6389   match(Set dst (MulVI src (LoadVector mem)));
6390   format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
6391   ins_encode %{
6392     int vector_len = 1;
6393     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6394   %}
6395   ins_pipe( pipe_slow );
6396 %}
6397 
6398 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{
6399   predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq());
6400   match(Set dst (MulVL src (LoadVector mem)));
6401   format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %}
6402   ins_encode %{
6403     int vector_len = 2;
6404     __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6405   %}
6406   ins_pipe( pipe_slow );
6407 %}
6408 
6409 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{
6410   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6411   match(Set dst (MulVI src (LoadVector mem)));
6412   format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %}
6413   ins_encode %{
6414     int vector_len = 2;
6415     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6416   %}
6417   ins_pipe( pipe_slow );
6418 %}
6419 
6420 // Floats vector mul
6421 instruct vmul2F(vecD dst, vecD src) %{
6422   predicate(n->as_Vector()->length() == 2);
6423   match(Set dst (MulVF dst src));
6424   format %{ "mulps   $dst,$src\t! mul packed2F" %}
6425   ins_encode %{
6426     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
6427   %}
6428   ins_pipe( pipe_slow );
6429 %}
6430 
6431 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
6432   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6433   match(Set dst (MulVF src1 src2));
6434   format %{ "vmulps  $dst,$src1,$src2\t! mul packed2F" %}
6435   ins_encode %{
6436     int vector_len = 0;
6437     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6438   %}
6439   ins_pipe( pipe_slow );
6440 %}
6441 
6442 instruct vmul4F(vecX dst, vecX src) %{
6443   predicate(n->as_Vector()->length() == 4);
6444   match(Set dst (MulVF dst src));
6445   format %{ "mulps   $dst,$src\t! mul packed4F" %}
6446   ins_encode %{
6447     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
6448   %}
6449   ins_pipe( pipe_slow );
6450 %}
6451 
6452 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
6453   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6454   match(Set dst (MulVF src1 src2));
6455   format %{ "vmulps  $dst,$src1,$src2\t! mul packed4F" %}
6456   ins_encode %{
6457     int vector_len = 0;
6458     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6459   %}
6460   ins_pipe( pipe_slow );
6461 %}
6462 
6463 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
6464   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6465   match(Set dst (MulVF src (LoadVector mem)));
6466   format %{ "vmulps  $dst,$src,$mem\t! mul packed4F" %}
6467   ins_encode %{
6468     int vector_len = 0;
6469     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6470   %}
6471   ins_pipe( pipe_slow );
6472 %}
6473 
6474 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
6475   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6476   match(Set dst (MulVF src1 src2));
6477   format %{ "vmulps  $dst,$src1,$src2\t! mul packed8F" %}
6478   ins_encode %{
6479     int vector_len = 1;
6480     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6481   %}
6482   ins_pipe( pipe_slow );
6483 %}
6484 
6485 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
6486   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6487   match(Set dst (MulVF src (LoadVector mem)));
6488   format %{ "vmulps  $dst,$src,$mem\t! mul packed8F" %}
6489   ins_encode %{
6490     int vector_len = 1;
6491     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6492   %}
6493   ins_pipe( pipe_slow );
6494 %}
6495 
6496 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
6497   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6498   match(Set dst (MulVF src1 src2));
6499   format %{ "vmulps  $dst,$src1,$src2\t! mul packed16F" %}
6500   ins_encode %{
6501     int vector_len = 2;
6502     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6503   %}
6504   ins_pipe( pipe_slow );
6505 %}
6506 
6507 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{
6508   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6509   match(Set dst (MulVF src (LoadVector mem)));
6510   format %{ "vmulps  $dst,$src,$mem\t! mul packed16F" %}
6511   ins_encode %{
6512     int vector_len = 2;
6513     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6514   %}
6515   ins_pipe( pipe_slow );
6516 %}
6517 
6518 // Doubles vector mul
6519 instruct vmul2D(vecX dst, vecX src) %{
6520   predicate(n->as_Vector()->length() == 2);
6521   match(Set dst (MulVD dst src));
6522   format %{ "mulpd   $dst,$src\t! mul packed2D" %}
6523   ins_encode %{
6524     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
6525   %}
6526   ins_pipe( pipe_slow );
6527 %}
6528 
6529 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
6530   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6531   match(Set dst (MulVD src1 src2));
6532   format %{ "vmulpd  $dst,$src1,$src2\t! mul packed2D" %}
6533   ins_encode %{
6534     int vector_len = 0;
6535     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6536   %}
6537   ins_pipe( pipe_slow );
6538 %}
6539 
6540 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
6541   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6542   match(Set dst (MulVD src (LoadVector mem)));
6543   format %{ "vmulpd  $dst,$src,$mem\t! mul packed2D" %}
6544   ins_encode %{
6545     int vector_len = 0;
6546     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6547   %}
6548   ins_pipe( pipe_slow );
6549 %}
6550 
6551 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
6552   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6553   match(Set dst (MulVD src1 src2));
6554   format %{ "vmulpd  $dst,$src1,$src2\t! mul packed4D" %}
6555   ins_encode %{
6556     int vector_len = 1;
6557     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6558   %}
6559   ins_pipe( pipe_slow );
6560 %}
6561 
6562 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
6563   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6564   match(Set dst (MulVD src (LoadVector mem)));
6565   format %{ "vmulpd  $dst,$src,$mem\t! mul packed4D" %}
6566   ins_encode %{
6567     int vector_len = 1;
6568     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6569   %}
6570   ins_pipe( pipe_slow );
6571 %}
6572 
6573 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
6574   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6575   match(Set dst (MulVD src1 src2));
6576   format %{ "vmulpd  $dst k0,$src1,$src2\t! mul packed8D" %}
6577   ins_encode %{
6578     int vector_len = 2;
6579     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6580   %}
6581   ins_pipe( pipe_slow );
6582 %}
6583 
6584 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{
6585   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6586   match(Set dst (MulVD src (LoadVector mem)));
6587   format %{ "vmulpd  $dst k0,$src,$mem\t! mul packed8D" %}
6588   ins_encode %{
6589     int vector_len = 2;
6590     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6591   %}
6592   ins_pipe( pipe_slow );
6593 %}
6594 
6595 // --------------------------------- DIV --------------------------------------
6596 
6597 // Floats vector div
6598 instruct vdiv2F(vecD dst, vecD src) %{
6599   predicate(n->as_Vector()->length() == 2);
6600   match(Set dst (DivVF dst src));
6601   format %{ "divps   $dst,$src\t! div packed2F" %}
6602   ins_encode %{
6603     __ divps($dst$$XMMRegister, $src$$XMMRegister);
6604   %}
6605   ins_pipe( pipe_slow );
6606 %}
6607 
6608 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
6609   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6610   match(Set dst (DivVF src1 src2));
6611   format %{ "vdivps  $dst,$src1,$src2\t! div packed2F" %}
6612   ins_encode %{
6613     int vector_len = 0;
6614     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6615   %}
6616   ins_pipe( pipe_slow );
6617 %}
6618 
6619 instruct vdiv4F(vecX dst, vecX src) %{
6620   predicate(n->as_Vector()->length() == 4);
6621   match(Set dst (DivVF dst src));
6622   format %{ "divps   $dst,$src\t! div packed4F" %}
6623   ins_encode %{
6624     __ divps($dst$$XMMRegister, $src$$XMMRegister);
6625   %}
6626   ins_pipe( pipe_slow );
6627 %}
6628 
6629 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
6630   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6631   match(Set dst (DivVF src1 src2));
6632   format %{ "vdivps  $dst,$src1,$src2\t! div packed4F" %}
6633   ins_encode %{
6634     int vector_len = 0;
6635     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6636   %}
6637   ins_pipe( pipe_slow );
6638 %}
6639 
6640 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
6641   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6642   match(Set dst (DivVF src (LoadVector mem)));
6643   format %{ "vdivps  $dst,$src,$mem\t! div packed4F" %}
6644   ins_encode %{
6645     int vector_len = 0;
6646     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6647   %}
6648   ins_pipe( pipe_slow );
6649 %}
6650 
6651 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
6652   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6653   match(Set dst (DivVF src1 src2));
6654   format %{ "vdivps  $dst,$src1,$src2\t! div packed8F" %}
6655   ins_encode %{
6656     int vector_len = 1;
6657     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6658   %}
6659   ins_pipe( pipe_slow );
6660 %}
6661 
6662 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
6663   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6664   match(Set dst (DivVF src (LoadVector mem)));
6665   format %{ "vdivps  $dst,$src,$mem\t! div packed8F" %}
6666   ins_encode %{
6667     int vector_len = 1;
6668     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6669   %}
6670   ins_pipe( pipe_slow );
6671 %}
6672 
6673 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
6674   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
6675   match(Set dst (DivVF src1 src2));
6676   format %{ "vdivps  $dst,$src1,$src2\t! div packed16F" %}
6677   ins_encode %{
6678     int vector_len = 2;
6679     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6680   %}
6681   ins_pipe( pipe_slow );
6682 %}
6683 
6684 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{
6685   predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
6686   match(Set dst (DivVF src (LoadVector mem)));
6687   format %{ "vdivps  $dst,$src,$mem\t! div packed16F" %}
6688   ins_encode %{
6689     int vector_len = 2;
6690     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6691   %}
6692   ins_pipe( pipe_slow );
6693 %}
6694 
6695 // Doubles vector div
6696 instruct vdiv2D(vecX dst, vecX src) %{
6697   predicate(n->as_Vector()->length() == 2);
6698   match(Set dst (DivVD dst src));
6699   format %{ "divpd   $dst,$src\t! div packed2D" %}
6700   ins_encode %{
6701     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
6702   %}
6703   ins_pipe( pipe_slow );
6704 %}
6705 
6706 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
6707   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6708   match(Set dst (DivVD src1 src2));
6709   format %{ "vdivpd  $dst,$src1,$src2\t! div packed2D" %}
6710   ins_encode %{
6711     int vector_len = 0;
6712     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6713   %}
6714   ins_pipe( pipe_slow );
6715 %}
6716 
6717 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
6718   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6719   match(Set dst (DivVD src (LoadVector mem)));
6720   format %{ "vdivpd  $dst,$src,$mem\t! div packed2D" %}
6721   ins_encode %{
6722     int vector_len = 0;
6723     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6724   %}
6725   ins_pipe( pipe_slow );
6726 %}
6727 
6728 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
6729   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6730   match(Set dst (DivVD src1 src2));
6731   format %{ "vdivpd  $dst,$src1,$src2\t! div packed4D" %}
6732   ins_encode %{
6733     int vector_len = 1;
6734     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6735   %}
6736   ins_pipe( pipe_slow );
6737 %}
6738 
6739 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
6740   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6741   match(Set dst (DivVD src (LoadVector mem)));
6742   format %{ "vdivpd  $dst,$src,$mem\t! div packed4D" %}
6743   ins_encode %{
6744     int vector_len = 1;
6745     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6746   %}
6747   ins_pipe( pipe_slow );
6748 %}
6749 
6750 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
6751   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6752   match(Set dst (DivVD src1 src2));
6753   format %{ "vdivpd  $dst,$src1,$src2\t! div packed8D" %}
6754   ins_encode %{
6755     int vector_len = 2;
6756     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6757   %}
6758   ins_pipe( pipe_slow );
6759 %}
6760 
6761 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{
6762   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6763   match(Set dst (DivVD src (LoadVector mem)));
6764   format %{ "vdivpd  $dst,$src,$mem\t! div packed8D" %}
6765   ins_encode %{
6766     int vector_len = 2;
6767     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6768   %}
6769   ins_pipe( pipe_slow );
6770 %}
6771 
6772 // ------------------------------ Shift ---------------------------------------
6773 
6774 // Left and right shift count vectors are the same on x86
6775 // (only lowest bits of xmm reg are used for count).
6776 instruct vshiftcnt(vecS dst, rRegI cnt) %{
6777   match(Set dst (LShiftCntV cnt));
6778   match(Set dst (RShiftCntV cnt));
6779   format %{ "movd    $dst,$cnt\t! load shift count" %}
6780   ins_encode %{
6781     __ movdl($dst$$XMMRegister, $cnt$$Register);
6782   %}
6783   ins_pipe( pipe_slow );
6784 %}
6785 
6786 // ------------------------------ LeftShift -----------------------------------
6787 


6794     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
6795   %}
6796   ins_pipe( pipe_slow );
6797 %}
6798 
6799 instruct vsll2S_imm(vecS dst, immI8 shift) %{
6800   predicate(n->as_Vector()->length() == 2);
6801   match(Set dst (LShiftVS dst shift));
6802   format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
6803   ins_encode %{
6804     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
6805   %}
6806   ins_pipe( pipe_slow );
6807 %}
6808 
6809 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
6810   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6811   match(Set dst (LShiftVS src shift));
6812   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
6813   ins_encode %{
6814     int vector_len = 0;
6815     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
6816   %}
6817   ins_pipe( pipe_slow );
6818 %}
6819 
6820 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
6821   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6822   match(Set dst (LShiftVS src shift));
6823   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
6824   ins_encode %{
6825     int vector_len = 0;
6826     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
6827   %}
6828   ins_pipe( pipe_slow );
6829 %}
6830 
6831 instruct vsll4S(vecD dst, vecS shift) %{
6832   predicate(n->as_Vector()->length() == 4);
6833   match(Set dst (LShiftVS dst shift));
6834   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
6835   ins_encode %{
6836     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
6837   %}
6838   ins_pipe( pipe_slow );
6839 %}
6840 
6841 instruct vsll4S_imm(vecD dst, immI8 shift) %{
6842   predicate(n->as_Vector()->length() == 4);
6843   match(Set dst (LShiftVS dst shift));
6844   format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
6845   ins_encode %{
6846     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
6847   %}
6848   ins_pipe( pipe_slow );
6849 %}
6850 
6851 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
6852   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6853   match(Set dst (LShiftVS src shift));
6854   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
6855   ins_encode %{
6856     int vector_len = 0;
6857     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
6858   %}
6859   ins_pipe( pipe_slow );
6860 %}
6861 
6862 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
6863   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6864   match(Set dst (LShiftVS src shift));
6865   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
6866   ins_encode %{
6867     int vector_len = 0;
6868     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
6869   %}
6870   ins_pipe( pipe_slow );
6871 %}
6872 
6873 instruct vsll8S(vecX dst, vecS shift) %{
6874   predicate(n->as_Vector()->length() == 8);
6875   match(Set dst (LShiftVS dst shift));
6876   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
6877   ins_encode %{
6878     __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
6879   %}
6880   ins_pipe( pipe_slow );
6881 %}
6882 
6883 instruct vsll8S_imm(vecX dst, immI8 shift) %{
6884   predicate(n->as_Vector()->length() == 8);
6885   match(Set dst (LShiftVS dst shift));
6886   format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
6887   ins_encode %{
6888     __ psllw($dst$$XMMRegister, (int)$shift$$constant);
6889   %}
6890   ins_pipe( pipe_slow );
6891 %}
6892 
6893 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
6894   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6895   match(Set dst (LShiftVS src shift));
6896   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
6897   ins_encode %{
6898     int vector_len = 0;
6899     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
6900   %}
6901   ins_pipe( pipe_slow );
6902 %}
6903 
6904 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
6905   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6906   match(Set dst (LShiftVS src shift));
6907   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
6908   ins_encode %{
6909     int vector_len = 0;
6910     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
6911   %}
6912   ins_pipe( pipe_slow );
6913 %}
6914 
6915 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
6916   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
6917   match(Set dst (LShiftVS src shift));
6918   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
6919   ins_encode %{
6920     int vector_len = 1;
6921     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
6922   %}
6923   ins_pipe( pipe_slow );
6924 %}
6925 
6926 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
6927   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
6928   match(Set dst (LShiftVS src shift));
6929   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
6930   ins_encode %{
6931     int vector_len = 1;
6932     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
6933   %}
6934   ins_pipe( pipe_slow );
6935 %}
6936 
6937 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{
6938   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
6939   match(Set dst (LShiftVS src shift));
6940   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed32S" %}
6941   ins_encode %{
6942     int vector_len = 2;
6943     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
6944   %}
6945   ins_pipe( pipe_slow );
6946 %}
6947 
6948 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
6949   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
6950   match(Set dst (LShiftVS src shift));
6951   format %{ "vpsllw  $dst,$src,$shift\t! left shift packed32S" %}
6952   ins_encode %{
6953     int vector_len = 2;
6954     __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
6955   %}
6956   ins_pipe( pipe_slow );
6957 %}
6958 
6959 // Integers vector left shift
6960 instruct vsll2I(vecD dst, vecS shift) %{
6961   predicate(n->as_Vector()->length() == 2);
6962   match(Set dst (LShiftVI dst shift));
6963   format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
6964   ins_encode %{
6965     __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
6966   %}
6967   ins_pipe( pipe_slow );
6968 %}
6969 
6970 instruct vsll2I_imm(vecD dst, immI8 shift) %{
6971   predicate(n->as_Vector()->length() == 2);
6972   match(Set dst (LShiftVI dst shift));
6973   format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
6974   ins_encode %{
6975     __ pslld($dst$$XMMRegister, (int)$shift$$constant);
6976   %}
6977   ins_pipe( pipe_slow );
6978 %}
6979 
6980 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{
6981   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6982   match(Set dst (LShiftVI src shift));
6983   format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
6984   ins_encode %{
6985     int vector_len = 0;
6986     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
6987   %}
6988   ins_pipe( pipe_slow );
6989 %}
6990 
6991 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
6992   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6993   match(Set dst (LShiftVI src shift));
6994   format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
6995   ins_encode %{
6996     int vector_len = 0;
6997     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
6998   %}
6999   ins_pipe( pipe_slow );
7000 %}
7001 
7002 instruct vsll4I(vecX dst, vecS shift) %{
7003   predicate(n->as_Vector()->length() == 4);
7004   match(Set dst (LShiftVI dst shift));
7005   format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
7006   ins_encode %{
7007     __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
7008   %}
7009   ins_pipe( pipe_slow );
7010 %}
7011 
7012 instruct vsll4I_imm(vecX dst, immI8 shift) %{
7013   predicate(n->as_Vector()->length() == 4);
7014   match(Set dst (LShiftVI dst shift));
7015   format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
7016   ins_encode %{
7017     __ pslld($dst$$XMMRegister, (int)$shift$$constant);
7018   %}
7019   ins_pipe( pipe_slow );
7020 %}
7021 
7022 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{
7023   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7024   match(Set dst (LShiftVI src shift));
7025   format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
7026   ins_encode %{
7027     int vector_len = 0;
7028     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7029   %}
7030   ins_pipe( pipe_slow );
7031 %}
7032 
7033 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
7034   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7035   match(Set dst (LShiftVI src shift));
7036   format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
7037   ins_encode %{
7038     int vector_len = 0;
7039     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7040   %}
7041   ins_pipe( pipe_slow );
7042 %}
7043 
7044 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{
7045   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7046   match(Set dst (LShiftVI src shift));
7047   format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
7048   ins_encode %{
7049     int vector_len = 1;
7050     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7051   %}
7052   ins_pipe( pipe_slow );
7053 %}
7054 
7055 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
7056   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7057   match(Set dst (LShiftVI src shift));
7058   format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
7059   ins_encode %{
7060     int vector_len = 1;
7061     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7062   %}
7063   ins_pipe( pipe_slow );
7064 %}
7065 
7066 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{
7067   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7068   match(Set dst (LShiftVI src shift));
7069   format %{ "vpslld  $dst,$src,$shift\t! left shift packed16I" %}
7070   ins_encode %{
7071     int vector_len = 2;
7072     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7073   %}
7074   ins_pipe( pipe_slow );
7075 %}
7076 
7077 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7078   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7079   match(Set dst (LShiftVI src shift));
7080   format %{ "vpslld  $dst,$src,$shift\t! left shift packed16I" %}
7081   ins_encode %{
7082     int vector_len = 2;
7083     __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7084   %}
7085   ins_pipe( pipe_slow );
7086 %}
7087 
7088 // Longs vector left shift
7089 instruct vsll2L(vecX dst, vecS shift) %{
7090   predicate(n->as_Vector()->length() == 2);
7091   match(Set dst (LShiftVL dst shift));
7092   format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
7093   ins_encode %{
7094     __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
7095   %}
7096   ins_pipe( pipe_slow );
7097 %}
7098 
7099 instruct vsll2L_imm(vecX dst, immI8 shift) %{
7100   predicate(n->as_Vector()->length() == 2);
7101   match(Set dst (LShiftVL dst shift));
7102   format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
7103   ins_encode %{
7104     __ psllq($dst$$XMMRegister, (int)$shift$$constant);
7105   %}
7106   ins_pipe( pipe_slow );
7107 %}
7108 
7109 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{
7110   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7111   match(Set dst (LShiftVL src shift));
7112   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
7113   ins_encode %{
7114     int vector_len = 0;
7115     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7116   %}
7117   ins_pipe( pipe_slow );
7118 %}
7119 
7120 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
7121   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7122   match(Set dst (LShiftVL src shift));
7123   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
7124   ins_encode %{
7125     int vector_len = 0;
7126     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7127   %}
7128   ins_pipe( pipe_slow );
7129 %}
7130 
7131 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{
7132   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
7133   match(Set dst (LShiftVL src shift));
7134   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
7135   ins_encode %{
7136     int vector_len = 1;
7137     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7138   %}
7139   ins_pipe( pipe_slow );
7140 %}
7141 
7142 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
7143   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
7144   match(Set dst (LShiftVL src shift));
7145   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
7146   ins_encode %{
7147     int vector_len = 1;
7148     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7149   %}
7150   ins_pipe( pipe_slow );
7151 %}
7152 
7153 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{
7154   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7155   match(Set dst (LShiftVL src shift));
7156   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed8L" %}
7157   ins_encode %{
7158     int vector_len = 2;
7159     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7160   %}
7161   ins_pipe( pipe_slow );
7162 %}
7163 
7164 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7165   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7166   match(Set dst (LShiftVL src shift));
7167   format %{ "vpsllq  $dst,$src,$shift\t! left shift packed8L" %}
7168   ins_encode %{
7169     int vector_len = 2;
7170     __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7171   %}
7172   ins_pipe( pipe_slow );
7173 %}
7174 
7175 // ----------------------- LogicalRightShift -----------------------------------
7176 
7177 // Shorts vector logical right shift produces incorrect Java result
7178 // for negative data because java code convert short value into int with
7179 // sign extension before a shift. But char vectors are fine since chars are
7180 // unsigned values.
7181 
7182 instruct vsrl2S(vecS dst, vecS shift) %{
7183   predicate(n->as_Vector()->length() == 2);
7184   match(Set dst (URShiftVS dst shift));
7185   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
7186   ins_encode %{
7187     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
7188   %}
7189   ins_pipe( pipe_slow );
7190 %}
7191 
7192 instruct vsrl2S_imm(vecS dst, immI8 shift) %{
7193   predicate(n->as_Vector()->length() == 2);
7194   match(Set dst (URShiftVS dst shift));
7195   format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
7196   ins_encode %{
7197     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
7198   %}
7199   ins_pipe( pipe_slow );
7200 %}
7201 
7202 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
7203   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7204   match(Set dst (URShiftVS src shift));
7205   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
7206   ins_encode %{
7207     int vector_len = 0;
7208     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7209   %}
7210   ins_pipe( pipe_slow );
7211 %}
7212 
7213 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
7214   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7215   match(Set dst (URShiftVS src shift));
7216   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
7217   ins_encode %{
7218     int vector_len = 0;
7219     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7220   %}
7221   ins_pipe( pipe_slow );
7222 %}
7223 
7224 instruct vsrl4S(vecD dst, vecS shift) %{
7225   predicate(n->as_Vector()->length() == 4);
7226   match(Set dst (URShiftVS dst shift));
7227   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
7228   ins_encode %{
7229     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
7230   %}
7231   ins_pipe( pipe_slow );
7232 %}
7233 
7234 instruct vsrl4S_imm(vecD dst, immI8 shift) %{
7235   predicate(n->as_Vector()->length() == 4);
7236   match(Set dst (URShiftVS dst shift));
7237   format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
7238   ins_encode %{
7239     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
7240   %}
7241   ins_pipe( pipe_slow );
7242 %}
7243 
7244 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
7245   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7246   match(Set dst (URShiftVS src shift));
7247   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
7248   ins_encode %{
7249     int vector_len = 0;
7250     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7251   %}
7252   ins_pipe( pipe_slow );
7253 %}
7254 
7255 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
7256   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7257   match(Set dst (URShiftVS src shift));
7258   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
7259   ins_encode %{
7260     int vector_len = 0;
7261     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7262   %}
7263   ins_pipe( pipe_slow );
7264 %}
7265 
7266 instruct vsrl8S(vecX dst, vecS shift) %{
7267   predicate(n->as_Vector()->length() == 8);
7268   match(Set dst (URShiftVS dst shift));
7269   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
7270   ins_encode %{
7271     __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
7272   %}
7273   ins_pipe( pipe_slow );
7274 %}
7275 
7276 instruct vsrl8S_imm(vecX dst, immI8 shift) %{
7277   predicate(n->as_Vector()->length() == 8);
7278   match(Set dst (URShiftVS dst shift));
7279   format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
7280   ins_encode %{
7281     __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
7282   %}
7283   ins_pipe( pipe_slow );
7284 %}
7285 
7286 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
7287   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7288   match(Set dst (URShiftVS src shift));
7289   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
7290   ins_encode %{
7291     int vector_len = 0;
7292     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7293   %}
7294   ins_pipe( pipe_slow );
7295 %}
7296 
7297 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
7298   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7299   match(Set dst (URShiftVS src shift));
7300   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
7301   ins_encode %{
7302     int vector_len = 0;
7303     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7304   %}
7305   ins_pipe( pipe_slow );
7306 %}
7307 
7308 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
7309   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
7310   match(Set dst (URShiftVS src shift));
7311   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
7312   ins_encode %{
7313     int vector_len = 1;
7314     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7315   %}
7316   ins_pipe( pipe_slow );
7317 %}
7318 
7319 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
7320   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
7321   match(Set dst (URShiftVS src shift));
7322   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
7323   ins_encode %{
7324     int vector_len = 1;
7325     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7326   %}
7327   ins_pipe( pipe_slow );
7328 %}
7329 
7330 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{
7331   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
7332   match(Set dst (URShiftVS src shift));
7333   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed32S" %}
7334   ins_encode %{
7335     int vector_len = 2;
7336     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7337   %}
7338   ins_pipe( pipe_slow );
7339 %}
7340 
7341 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7342   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
7343   match(Set dst (URShiftVS src shift));
7344   format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed32S" %}
7345   ins_encode %{
7346     int vector_len = 2;
7347     __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7348   %}
7349   ins_pipe( pipe_slow );
7350 %}
7351 
7352 // Integers vector logical right shift
7353 instruct vsrl2I(vecD dst, vecS shift) %{
7354   predicate(n->as_Vector()->length() == 2);
7355   match(Set dst (URShiftVI dst shift));
7356   format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
7357   ins_encode %{
7358     __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
7359   %}
7360   ins_pipe( pipe_slow );
7361 %}
7362 
7363 instruct vsrl2I_imm(vecD dst, immI8 shift) %{
7364   predicate(n->as_Vector()->length() == 2);
7365   match(Set dst (URShiftVI dst shift));
7366   format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
7367   ins_encode %{
7368     __ psrld($dst$$XMMRegister, (int)$shift$$constant);
7369   %}
7370   ins_pipe( pipe_slow );
7371 %}
7372 
7373 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{
7374   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7375   match(Set dst (URShiftVI src shift));
7376   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
7377   ins_encode %{
7378     int vector_len = 0;
7379     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7380   %}
7381   ins_pipe( pipe_slow );
7382 %}
7383 
7384 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
7385   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7386   match(Set dst (URShiftVI src shift));
7387   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
7388   ins_encode %{
7389     int vector_len = 0;
7390     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7391   %}
7392   ins_pipe( pipe_slow );
7393 %}
7394 
7395 instruct vsrl4I(vecX dst, vecS shift) %{
7396   predicate(n->as_Vector()->length() == 4);
7397   match(Set dst (URShiftVI dst shift));
7398   format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
7399   ins_encode %{
7400     __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
7401   %}
7402   ins_pipe( pipe_slow );
7403 %}
7404 
7405 instruct vsrl4I_imm(vecX dst, immI8 shift) %{
7406   predicate(n->as_Vector()->length() == 4);
7407   match(Set dst (URShiftVI dst shift));
7408   format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
7409   ins_encode %{
7410     __ psrld($dst$$XMMRegister, (int)$shift$$constant);
7411   %}
7412   ins_pipe( pipe_slow );
7413 %}
7414 
7415 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{
7416   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7417   match(Set dst (URShiftVI src shift));
7418   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
7419   ins_encode %{
7420     int vector_len = 0;
7421     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7422   %}
7423   ins_pipe( pipe_slow );
7424 %}
7425 
7426 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
7427   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7428   match(Set dst (URShiftVI src shift));
7429   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
7430   ins_encode %{
7431     int vector_len = 0;
7432     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7433   %}
7434   ins_pipe( pipe_slow );
7435 %}
7436 
7437 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{
7438   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7439   match(Set dst (URShiftVI src shift));
7440   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
7441   ins_encode %{
7442     int vector_len = 1;
7443     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7444   %}
7445   ins_pipe( pipe_slow );
7446 %}
7447 
7448 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
7449   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7450   match(Set dst (URShiftVI src shift));
7451   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
7452   ins_encode %{
7453     int vector_len = 1;
7454     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7455   %}
7456   ins_pipe( pipe_slow );
7457 %}
7458 
7459 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{
7460   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7461   match(Set dst (URShiftVI src shift));
7462   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed16I" %}
7463   ins_encode %{
7464     int vector_len = 2;
7465     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7466   %}
7467   ins_pipe( pipe_slow );
7468 %}
7469 
7470 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7471   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7472   match(Set dst (URShiftVI src shift));
7473   format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed16I" %}
7474   ins_encode %{
7475     int vector_len = 2;
7476     __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7477   %}
7478   ins_pipe( pipe_slow );
7479 %}
7480 
7481 // Longs vector logical right shift
7482 instruct vsrl2L(vecX dst, vecS shift) %{
7483   predicate(n->as_Vector()->length() == 2);
7484   match(Set dst (URShiftVL dst shift));
7485   format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
7486   ins_encode %{
7487     __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
7488   %}
7489   ins_pipe( pipe_slow );
7490 %}
7491 
7492 instruct vsrl2L_imm(vecX dst, immI8 shift) %{
7493   predicate(n->as_Vector()->length() == 2);
7494   match(Set dst (URShiftVL dst shift));
7495   format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
7496   ins_encode %{
7497     __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
7498   %}
7499   ins_pipe( pipe_slow );
7500 %}
7501 
7502 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{
7503   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7504   match(Set dst (URShiftVL src shift));
7505   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
7506   ins_encode %{
7507     int vector_len = 0;
7508     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7509   %}
7510   ins_pipe( pipe_slow );
7511 %}
7512 
7513 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
7514   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7515   match(Set dst (URShiftVL src shift));
7516   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
7517   ins_encode %{
7518     int vector_len = 0;
7519     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7520   %}
7521   ins_pipe( pipe_slow );
7522 %}
7523 
7524 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{
7525   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
7526   match(Set dst (URShiftVL src shift));
7527   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
7528   ins_encode %{
7529     int vector_len = 1;
7530     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7531   %}
7532   ins_pipe( pipe_slow );
7533 %}
7534 
7535 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
7536   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
7537   match(Set dst (URShiftVL src shift));
7538   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
7539   ins_encode %{
7540     int vector_len = 1;
7541     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7542   %}
7543   ins_pipe( pipe_slow );
7544 %}
7545 
7546 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{
7547   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7548   match(Set dst (URShiftVL src shift));
7549   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed8L" %}
7550   ins_encode %{
7551     int vector_len = 2;
7552     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7553   %}
7554   ins_pipe( pipe_slow );
7555 %}
7556 
7557 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7558   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7559   match(Set dst (URShiftVL src shift));
7560   format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed8L" %}
7561   ins_encode %{
7562     int vector_len = 2;
7563     __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7564   %}
7565   ins_pipe( pipe_slow );
7566 %}
7567 
7568 // ------------------- ArithmeticRightShift -----------------------------------
7569 
7570 // Shorts/Chars vector arithmetic right shift
7571 instruct vsra2S(vecS dst, vecS shift) %{
7572   predicate(n->as_Vector()->length() == 2);
7573   match(Set dst (RShiftVS dst shift));
7574   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
7575   ins_encode %{
7576     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
7577   %}
7578   ins_pipe( pipe_slow );
7579 %}
7580 
7581 instruct vsra2S_imm(vecS dst, immI8 shift) %{
7582   predicate(n->as_Vector()->length() == 2);
7583   match(Set dst (RShiftVS dst shift));
7584   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
7585   ins_encode %{
7586     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
7587   %}
7588   ins_pipe( pipe_slow );
7589 %}
7590 
7591 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
7592   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7593   match(Set dst (RShiftVS src shift));
7594   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
7595   ins_encode %{
7596     int vector_len = 0;
7597     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7598   %}
7599   ins_pipe( pipe_slow );
7600 %}
7601 
7602 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
7603   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7604   match(Set dst (RShiftVS src shift));
7605   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
7606   ins_encode %{
7607     int vector_len = 0;
7608     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7609   %}
7610   ins_pipe( pipe_slow );
7611 %}
7612 
7613 instruct vsra4S(vecD dst, vecS shift) %{
7614   predicate(n->as_Vector()->length() == 4);
7615   match(Set dst (RShiftVS dst shift));
7616   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
7617   ins_encode %{
7618     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
7619   %}
7620   ins_pipe( pipe_slow );
7621 %}
7622 
7623 instruct vsra4S_imm(vecD dst, immI8 shift) %{
7624   predicate(n->as_Vector()->length() == 4);
7625   match(Set dst (RShiftVS dst shift));
7626   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
7627   ins_encode %{
7628     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
7629   %}
7630   ins_pipe( pipe_slow );
7631 %}
7632 
7633 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
7634   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7635   match(Set dst (RShiftVS src shift));
7636   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
7637   ins_encode %{
7638     int vector_len = 0;
7639     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7640   %}
7641   ins_pipe( pipe_slow );
7642 %}
7643 
7644 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
7645   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7646   match(Set dst (RShiftVS src shift));
7647   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
7648   ins_encode %{
7649     int vector_len = 0;
7650     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7651   %}
7652   ins_pipe( pipe_slow );
7653 %}
7654 
7655 instruct vsra8S(vecX dst, vecS shift) %{
7656   predicate(n->as_Vector()->length() == 8);
7657   match(Set dst (RShiftVS dst shift));
7658   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
7659   ins_encode %{
7660     __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
7661   %}
7662   ins_pipe( pipe_slow );
7663 %}
7664 
7665 instruct vsra8S_imm(vecX dst, immI8 shift) %{
7666   predicate(n->as_Vector()->length() == 8);
7667   match(Set dst (RShiftVS dst shift));
7668   format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
7669   ins_encode %{
7670     __ psraw($dst$$XMMRegister, (int)$shift$$constant);
7671   %}
7672   ins_pipe( pipe_slow );
7673 %}
7674 
7675 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
7676   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7677   match(Set dst (RShiftVS src shift));
7678   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
7679   ins_encode %{
7680     int vector_len = 0;
7681     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7682   %}
7683   ins_pipe( pipe_slow );
7684 %}
7685 
7686 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
7687   predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7688   match(Set dst (RShiftVS src shift));
7689   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
7690   ins_encode %{
7691     int vector_len = 0;
7692     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7693   %}
7694   ins_pipe( pipe_slow );
7695 %}
7696 
7697 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
7698   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
7699   match(Set dst (RShiftVS src shift));
7700   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
7701   ins_encode %{
7702     int vector_len = 1;
7703     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7704   %}
7705   ins_pipe( pipe_slow );
7706 %}
7707 
7708 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
7709   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
7710   match(Set dst (RShiftVS src shift));
7711   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
7712   ins_encode %{
7713     int vector_len = 1;
7714     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7715   %}
7716   ins_pipe( pipe_slow );
7717 %}
7718 
7719 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{
7720   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
7721   match(Set dst (RShiftVS src shift));
7722   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed32S" %}
7723   ins_encode %{
7724     int vector_len = 2;
7725     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7726   %}
7727   ins_pipe( pipe_slow );
7728 %}
7729 
7730 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7731   predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
7732   match(Set dst (RShiftVS src shift));
7733   format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed32S" %}
7734   ins_encode %{
7735     int vector_len = 2;
7736     __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7737   %}
7738   ins_pipe( pipe_slow );
7739 %}
7740 
7741 // Integers vector arithmetic right shift
7742 instruct vsra2I(vecD dst, vecS shift) %{
7743   predicate(n->as_Vector()->length() == 2);
7744   match(Set dst (RShiftVI dst shift));
7745   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
7746   ins_encode %{
7747     __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
7748   %}
7749   ins_pipe( pipe_slow );
7750 %}
7751 
7752 instruct vsra2I_imm(vecD dst, immI8 shift) %{
7753   predicate(n->as_Vector()->length() == 2);
7754   match(Set dst (RShiftVI dst shift));
7755   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
7756   ins_encode %{
7757     __ psrad($dst$$XMMRegister, (int)$shift$$constant);
7758   %}
7759   ins_pipe( pipe_slow );
7760 %}
7761 
7762 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{
7763   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7764   match(Set dst (RShiftVI src shift));
7765   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
7766   ins_encode %{
7767     int vector_len = 0;
7768     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7769   %}
7770   ins_pipe( pipe_slow );
7771 %}
7772 
7773 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
7774   predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7775   match(Set dst (RShiftVI src shift));
7776   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
7777   ins_encode %{
7778     int vector_len = 0;
7779     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7780   %}
7781   ins_pipe( pipe_slow );
7782 %}
7783 
7784 instruct vsra4I(vecX dst, vecS shift) %{
7785   predicate(n->as_Vector()->length() == 4);
7786   match(Set dst (RShiftVI dst shift));
7787   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
7788   ins_encode %{
7789     __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
7790   %}
7791   ins_pipe( pipe_slow );
7792 %}
7793 
7794 instruct vsra4I_imm(vecX dst, immI8 shift) %{
7795   predicate(n->as_Vector()->length() == 4);
7796   match(Set dst (RShiftVI dst shift));
7797   format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
7798   ins_encode %{
7799     __ psrad($dst$$XMMRegister, (int)$shift$$constant);
7800   %}
7801   ins_pipe( pipe_slow );
7802 %}
7803 
7804 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{
7805   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7806   match(Set dst (RShiftVI src shift));
7807   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
7808   ins_encode %{
7809     int vector_len = 0;
7810     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7811   %}
7812   ins_pipe( pipe_slow );
7813 %}
7814 
7815 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
7816   predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7817   match(Set dst (RShiftVI src shift));
7818   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
7819   ins_encode %{
7820     int vector_len = 0;
7821     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7822   %}
7823   ins_pipe( pipe_slow );
7824 %}
7825 
7826 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{
7827   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7828   match(Set dst (RShiftVI src shift));
7829   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
7830   ins_encode %{
7831     int vector_len = 1;
7832     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7833   %}
7834   ins_pipe( pipe_slow );
7835 %}
7836 
7837 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
7838   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7839   match(Set dst (RShiftVI src shift));
7840   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
7841   ins_encode %{
7842     int vector_len = 1;
7843     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7844   %}
7845   ins_pipe( pipe_slow );
7846 %}
7847 
7848 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{
7849   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7850   match(Set dst (RShiftVI src shift));
7851   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed16I" %}
7852   ins_encode %{
7853     int vector_len = 2;
7854     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7855   %}
7856   ins_pipe( pipe_slow );
7857 %}
7858 
7859 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7860   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7861   match(Set dst (RShiftVI src shift));
7862   format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed16I" %}
7863   ins_encode %{
7864     int vector_len = 2;
7865     __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7866   %}
7867   ins_pipe( pipe_slow );
7868 %}
7869 
7870 // There are no longs vector arithmetic right shift instructions.
7871 
7872 
7873 // --------------------------------- AND --------------------------------------
7874 
7875 instruct vand4B(vecS dst, vecS src) %{
7876   predicate(n->as_Vector()->length_in_bytes() == 4);
7877   match(Set dst (AndV dst src));
7878   format %{ "pand    $dst,$src\t! and vectors (4 bytes)" %}
7879   ins_encode %{
7880     __ pand($dst$$XMMRegister, $src$$XMMRegister);
7881   %}
7882   ins_pipe( pipe_slow );
7883 %}
7884 
7885 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
7886   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
7887   match(Set dst (AndV src1 src2));
7888   format %{ "vpand   $dst,$src1,$src2\t! and vectors (4 bytes)" %}
7889   ins_encode %{
7890     int vector_len = 0;
7891     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7892   %}
7893   ins_pipe( pipe_slow );
7894 %}
7895 
7896 instruct vand8B(vecD dst, vecD src) %{
7897   predicate(n->as_Vector()->length_in_bytes() == 8);
7898   match(Set dst (AndV dst src));
7899   format %{ "pand    $dst,$src\t! and vectors (8 bytes)" %}
7900   ins_encode %{
7901     __ pand($dst$$XMMRegister, $src$$XMMRegister);
7902   %}
7903   ins_pipe( pipe_slow );
7904 %}
7905 
7906 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
7907   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
7908   match(Set dst (AndV src1 src2));
7909   format %{ "vpand   $dst,$src1,$src2\t! and vectors (8 bytes)" %}
7910   ins_encode %{
7911     int vector_len = 0;
7912     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7913   %}
7914   ins_pipe( pipe_slow );
7915 %}
7916 
7917 instruct vand16B(vecX dst, vecX src) %{
7918   predicate(n->as_Vector()->length_in_bytes() == 16);
7919   match(Set dst (AndV dst src));
7920   format %{ "pand    $dst,$src\t! and vectors (16 bytes)" %}
7921   ins_encode %{
7922     __ pand($dst$$XMMRegister, $src$$XMMRegister);
7923   %}
7924   ins_pipe( pipe_slow );
7925 %}
7926 
7927 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
7928   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
7929   match(Set dst (AndV src1 src2));
7930   format %{ "vpand   $dst,$src1,$src2\t! and vectors (16 bytes)" %}
7931   ins_encode %{
7932     int vector_len = 0;
7933     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7934   %}
7935   ins_pipe( pipe_slow );
7936 %}
7937 
7938 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
7939   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
7940   match(Set dst (AndV src (LoadVector mem)));
7941   format %{ "vpand   $dst,$src,$mem\t! and vectors (16 bytes)" %}
7942   ins_encode %{
7943     int vector_len = 0;
7944     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7945   %}
7946   ins_pipe( pipe_slow );
7947 %}
7948 
7949 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
7950   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
7951   match(Set dst (AndV src1 src2));
7952   format %{ "vpand   $dst,$src1,$src2\t! and vectors (32 bytes)" %}
7953   ins_encode %{
7954     int vector_len = 1;
7955     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7956   %}
7957   ins_pipe( pipe_slow );
7958 %}
7959 
7960 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
7961   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
7962   match(Set dst (AndV src (LoadVector mem)));
7963   format %{ "vpand   $dst,$src,$mem\t! and vectors (32 bytes)" %}
7964   ins_encode %{
7965     int vector_len = 1;
7966     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7967   %}
7968   ins_pipe( pipe_slow );
7969 %}
7970 
7971 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
7972   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
7973   match(Set dst (AndV src1 src2));
7974   format %{ "vpand   $dst,$src1,$src2\t! and vectors (64 bytes)" %}
7975   ins_encode %{
7976     int vector_len = 2;
7977     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7978   %}
7979   ins_pipe( pipe_slow );
7980 %}
7981 
7982 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{
7983   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
7984   match(Set dst (AndV src (LoadVector mem)));
7985   format %{ "vpand   $dst,$src,$mem\t! and vectors (64 bytes)" %}
7986   ins_encode %{
7987     int vector_len = 2;
7988     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7989   %}
7990   ins_pipe( pipe_slow );
7991 %}
7992 
7993 // --------------------------------- OR ---------------------------------------
7994 
7995 instruct vor4B(vecS dst, vecS src) %{
7996   predicate(n->as_Vector()->length_in_bytes() == 4);
7997   match(Set dst (OrV dst src));
7998   format %{ "por     $dst,$src\t! or vectors (4 bytes)" %}
7999   ins_encode %{
8000     __ por($dst$$XMMRegister, $src$$XMMRegister);
8001   %}
8002   ins_pipe( pipe_slow );
8003 %}
8004 
8005 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
8006   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
8007   match(Set dst (OrV src1 src2));
8008   format %{ "vpor    $dst,$src1,$src2\t! or vectors (4 bytes)" %}
8009   ins_encode %{
8010     int vector_len = 0;
8011     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8012   %}
8013   ins_pipe( pipe_slow );
8014 %}
8015 
8016 instruct vor8B(vecD dst, vecD src) %{
8017   predicate(n->as_Vector()->length_in_bytes() == 8);
8018   match(Set dst (OrV dst src));
8019   format %{ "por     $dst,$src\t! or vectors (8 bytes)" %}
8020   ins_encode %{
8021     __ por($dst$$XMMRegister, $src$$XMMRegister);
8022   %}
8023   ins_pipe( pipe_slow );
8024 %}
8025 
8026 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
8027   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
8028   match(Set dst (OrV src1 src2));
8029   format %{ "vpor    $dst,$src1,$src2\t! or vectors (8 bytes)" %}
8030   ins_encode %{
8031     int vector_len = 0;
8032     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8033   %}
8034   ins_pipe( pipe_slow );
8035 %}
8036 
8037 instruct vor16B(vecX dst, vecX src) %{
8038   predicate(n->as_Vector()->length_in_bytes() == 16);
8039   match(Set dst (OrV dst src));
8040   format %{ "por     $dst,$src\t! or vectors (16 bytes)" %}
8041   ins_encode %{
8042     __ por($dst$$XMMRegister, $src$$XMMRegister);
8043   %}
8044   ins_pipe( pipe_slow );
8045 %}
8046 
8047 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
8048   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
8049   match(Set dst (OrV src1 src2));
8050   format %{ "vpor    $dst,$src1,$src2\t! or vectors (16 bytes)" %}
8051   ins_encode %{
8052     int vector_len = 0;
8053     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8054   %}
8055   ins_pipe( pipe_slow );
8056 %}
8057 
8058 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
8059   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
8060   match(Set dst (OrV src (LoadVector mem)));
8061   format %{ "vpor    $dst,$src,$mem\t! or vectors (16 bytes)" %}
8062   ins_encode %{
8063     int vector_len = 0;
8064     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8065   %}
8066   ins_pipe( pipe_slow );
8067 %}
8068 
8069 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
8070   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
8071   match(Set dst (OrV src1 src2));
8072   format %{ "vpor    $dst,$src1,$src2\t! or vectors (32 bytes)" %}
8073   ins_encode %{
8074     int vector_len = 1;
8075     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8076   %}
8077   ins_pipe( pipe_slow );
8078 %}
8079 
8080 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
8081   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
8082   match(Set dst (OrV src (LoadVector mem)));
8083   format %{ "vpor    $dst,$src,$mem\t! or vectors (32 bytes)" %}
8084   ins_encode %{
8085     int vector_len = 1;
8086     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8087   %}
8088   ins_pipe( pipe_slow );
8089 %}
8090 
8091 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
8092   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
8093   match(Set dst (OrV src1 src2));
8094   format %{ "vpor    $dst,$src1,$src2\t! or vectors (64 bytes)" %}
8095   ins_encode %{
8096     int vector_len = 2;
8097     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8098   %}
8099   ins_pipe( pipe_slow );
8100 %}
8101 
8102 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{
8103   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
8104   match(Set dst (OrV src (LoadVector mem)));
8105   format %{ "vpor    $dst,$src,$mem\t! or vectors (64 bytes)" %}
8106   ins_encode %{
8107     int vector_len = 2;
8108     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8109   %}
8110   ins_pipe( pipe_slow );
8111 %}
8112 
8113 // --------------------------------- XOR --------------------------------------
8114 
8115 instruct vxor4B(vecS dst, vecS src) %{
8116   predicate(n->as_Vector()->length_in_bytes() == 4);
8117   match(Set dst (XorV dst src));
8118   format %{ "pxor    $dst,$src\t! xor vectors (4 bytes)" %}
8119   ins_encode %{
8120     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
8121   %}
8122   ins_pipe( pipe_slow );
8123 %}
8124 
8125 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
8126   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
8127   match(Set dst (XorV src1 src2));
8128   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
8129   ins_encode %{
8130     int vector_len = 0;
8131     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8132   %}
8133   ins_pipe( pipe_slow );
8134 %}
8135 
8136 instruct vxor8B(vecD dst, vecD src) %{
8137   predicate(n->as_Vector()->length_in_bytes() == 8);
8138   match(Set dst (XorV dst src));
8139   format %{ "pxor    $dst,$src\t! xor vectors (8 bytes)" %}
8140   ins_encode %{
8141     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
8142   %}
8143   ins_pipe( pipe_slow );
8144 %}
8145 
8146 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
8147   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
8148   match(Set dst (XorV src1 src2));
8149   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
8150   ins_encode %{
8151     int vector_len = 0;
8152     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8153   %}
8154   ins_pipe( pipe_slow );
8155 %}
8156 
8157 instruct vxor16B(vecX dst, vecX src) %{
8158   predicate(n->as_Vector()->length_in_bytes() == 16);
8159   match(Set dst (XorV dst src));
8160   format %{ "pxor    $dst,$src\t! xor vectors (16 bytes)" %}
8161   ins_encode %{
8162     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
8163   %}
8164   ins_pipe( pipe_slow );
8165 %}
8166 
8167 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
8168   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
8169   match(Set dst (XorV src1 src2));
8170   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
8171   ins_encode %{
8172     int vector_len = 0;
8173     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8174   %}
8175   ins_pipe( pipe_slow );
8176 %}
8177 
8178 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
8179   predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
8180   match(Set dst (XorV src (LoadVector mem)));
8181   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (16 bytes)" %}
8182   ins_encode %{
8183     int vector_len = 0;
8184     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8185   %}
8186   ins_pipe( pipe_slow );
8187 %}
8188 
8189 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
8190   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
8191   match(Set dst (XorV src1 src2));
8192   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
8193   ins_encode %{
8194     int vector_len = 1;
8195     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8196   %}
8197   ins_pipe( pipe_slow );
8198 %}
8199 
8200 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
8201   predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
8202   match(Set dst (XorV src (LoadVector mem)));
8203   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (32 bytes)" %}
8204   ins_encode %{
8205     int vector_len = 1;
8206     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8207   %}
8208   ins_pipe( pipe_slow );
8209 %}
8210 
8211 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
8212   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
8213   match(Set dst (XorV src1 src2));
8214   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (64 bytes)" %}
8215   ins_encode %{
8216     int vector_len = 2;
8217     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8218   %}
8219   ins_pipe( pipe_slow );
8220 %}
8221 
8222 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{
8223   predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
8224   match(Set dst (XorV src (LoadVector mem)));
8225   format %{ "vpxor   $dst,$src,$mem\t! xor vectors (64 bytes)" %}
8226   ins_encode %{
8227     int vector_len = 2;
8228     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8229   %}
8230   ins_pipe( pipe_slow );
8231 %}
8232 
< prev index next >