1355 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
1356 __ end_a_stub();
1357 return offset;
1358 }
1359
1360
1361 //=============================================================================
1362
1363 // Float masks come from different places depending on platform.
1364 #ifdef _LP64
1365 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
1366 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
1367 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
1368 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
1369 #else
1370 static address float_signmask() { return (address)float_signmask_pool; }
1371 static address float_signflip() { return (address)float_signflip_pool; }
1372 static address double_signmask() { return (address)double_signmask_pool; }
1373 static address double_signflip() { return (address)double_signflip_pool; }
1374 #endif
1375
1376
1377 const bool Matcher::match_rule_supported(int opcode) {
1378 if (!has_match_rule(opcode))
1379 return false;
1380
1381 bool ret_value = true;
1382 switch (opcode) {
1383 case Op_PopCountI:
1384 case Op_PopCountL:
1385 if (!UsePopCountInstruction)
1386 ret_value = false;
1387 break;
1388 case Op_PopCountVI:
1389 if (!UsePopCountInstruction || !VM_Version::supports_vpopcntdq())
1390 ret_value = false;
1391 break;
1392 case Op_MulVI:
1393 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
1394 ret_value = false;
1395 break;
1396 case Op_MulVL:
1397 case Op_MulReductionVL:
1398 if (VM_Version::supports_avx512dq() == false)
1399 ret_value = false;
1400 break;
1401 case Op_AddReductionVL:
1402 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here
1403 ret_value = false;
1404 break;
1405 case Op_AddReductionVI:
1406 if (UseSSE < 3) // requires at least SSE3
1407 ret_value = false;
1408 break;
1409 case Op_MulReductionVI:
1410 if (UseSSE < 4) // requires at least SSE4
1411 ret_value = false;
1412 break;
1413 case Op_AddReductionVF:
1414 case Op_AddReductionVD:
1415 case Op_MulReductionVF:
1416 case Op_MulReductionVD:
1417 if (UseSSE < 1) // requires at least SSE
1418 ret_value = false;
1419 break;
1420 case Op_SqrtVD:
1421 case Op_SqrtVF:
1422 if (UseAVX < 1) // enabled for AVX only
1423 ret_value = false;
1424 break;
1430 ret_value = false;
1431 break;
1432 case Op_CMoveVF:
1433 case Op_CMoveVD:
1434 if (UseAVX < 1 || UseAVX > 2)
1435 ret_value = false;
1436 break;
1437 case Op_StrIndexOf:
1438 if (!UseSSE42Intrinsics)
1439 ret_value = false;
1440 break;
1441 case Op_StrIndexOfChar:
1442 if (!UseSSE42Intrinsics)
1443 ret_value = false;
1444 break;
1445 case Op_OnSpinWait:
1446 if (VM_Version::supports_on_spin_wait() == false)
1447 ret_value = false;
1448 break;
1449 case Op_MulAddVS2VI:
1450 if (UseSSE < 2)
1451 ret_value = false;
1452 break;
1453 #ifdef _LP64
1454 case Op_MaxD:
1455 case Op_MaxF:
1456 case Op_MinD:
1457 case Op_MinF:
1458 if (UseAVX < 1) // enabled for AVX only
1459 ret_value = false;
1460 break;
1461 #endif
1462 }
1463
1464 return ret_value; // Per default match rules are supported.
1465 }
1466
1467 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
1468 // identify extra cases that we might want to provide match rules for
1469 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
1470 bool ret_value = match_rule_supported(opcode);
1471 if (ret_value) {
1472 switch (opcode) {
1473 case Op_AddVB:
1474 case Op_SubVB:
1475 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false))
1476 ret_value = false;
1477 break;
1478 case Op_URShiftVS:
1479 case Op_RShiftVS:
1480 case Op_LShiftVS:
1481 case Op_MulVS:
1482 case Op_AddVS:
1483 case Op_SubVS:
1484 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false))
1485 ret_value = false;
1486 break;
1487 case Op_CMoveVF:
1488 if (vlen != 8)
1489 ret_value = false;
1490 break;
1491 case Op_CMoveVD:
1492 if (vlen != 4)
1493 ret_value = false;
1494 break;
1495 }
1496 }
1497
1498 return ret_value; // Per default match rules are supported.
1499 }
1500
1501 const bool Matcher::has_predicated_vectors(void) {
1502 bool ret_value = false;
1503 if (UseAVX > 2) {
1504 ret_value = VM_Version::supports_avx512vl();
1505 }
1506
1507 return ret_value;
1508 }
1509
1510 const int Matcher::float_pressure(int default_pressure_threshold) {
7285 ins_encode %{
7286 int vector_len = 2;
7287 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7288 %}
7289 ins_pipe( pipe_slow );
7290 %}
7291
7292 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{
7293 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7294 match(Set dst (SubVD src (LoadVector mem)));
7295 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %}
7296 ins_encode %{
7297 int vector_len = 2;
7298 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7299 %}
7300 ins_pipe( pipe_slow );
7301 %}
7302
7303 // --------------------------------- MUL --------------------------------------
7304
7305 // Shorts/Chars vector mul
7306 instruct vmul2S(vecS dst, vecS src) %{
7307 predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
7308 match(Set dst (MulVS dst src));
7309 format %{ "pmullw $dst,$src\t! mul packed2S" %}
7310 ins_encode %{
7311 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
7312 %}
7313 ins_pipe( pipe_slow );
7314 %}
7315
7316 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
7317 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7318 match(Set dst (MulVS src1 src2));
7319 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
7320 ins_encode %{
7321 int vector_len = 0;
7322 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7323 %}
7324 ins_pipe( pipe_slow );
8007 match(Set dst (DivVD src1 src2));
8008 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %}
8009 ins_encode %{
8010 int vector_len = 2;
8011 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8012 %}
8013 ins_pipe( pipe_slow );
8014 %}
8015
8016 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{
8017 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
8018 match(Set dst (DivVD src (LoadVector mem)));
8019 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %}
8020 ins_encode %{
8021 int vector_len = 2;
8022 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8023 %}
8024 ins_pipe( pipe_slow );
8025 %}
8026
8027 // ------------------------------ Shift ---------------------------------------
8028
8029 // Left and right shift count vectors are the same on x86
8030 // (only lowest bits of xmm reg are used for count).
8031 instruct vshiftcnt(vecS dst, rRegI cnt) %{
8032 match(Set dst (LShiftCntV cnt));
8033 match(Set dst (RShiftCntV cnt));
8034 format %{ "movd $dst,$cnt\t! load shift count" %}
8035 ins_encode %{
8036 __ movdl($dst$$XMMRegister, $cnt$$Register);
8037 %}
8038 ins_pipe( pipe_slow );
8039 %}
8040
8041 // --------------------------------- Sqrt --------------------------------------
8042
8043 // Floating point vector sqrt
8044 instruct vsqrt2D_reg(vecX dst, vecX src) %{
8045 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8046 match(Set dst (SqrtVD src));
8047 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %}
8048 ins_encode %{
8049 int vector_len = 0;
8050 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
8051 %}
8052 ins_pipe( pipe_slow );
8053 %}
8054
8055 instruct vsqrt2D_mem(vecX dst, memory mem) %{
8056 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8057 match(Set dst (SqrtVD (LoadVector mem)));
8058 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %}
8059 ins_encode %{
8060 int vector_len = 0;
8178 match(Set dst (SqrtVF src));
8179 format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %}
8180 ins_encode %{
8181 int vector_len = 2;
8182 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len);
8183 %}
8184 ins_pipe( pipe_slow );
8185 %}
8186
8187 instruct vsqrt16F_mem(vecZ dst, memory mem) %{
8188 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
8189 match(Set dst (SqrtVF (LoadVector mem)));
8190 format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %}
8191 ins_encode %{
8192 int vector_len = 2;
8193 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len);
8194 %}
8195 ins_pipe( pipe_slow );
8196 %}
8197
8198 // ------------------------------ LeftShift -----------------------------------
8199
8200 // Shorts/Chars vector left shift
8201 instruct vsll2S(vecS dst, vecS shift) %{
8202 predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
8203 match(Set dst (LShiftVS dst shift));
8204 format %{ "psllw $dst,$shift\t! left shift packed2S" %}
8205 ins_encode %{
8206 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
8207 %}
8208 ins_pipe( pipe_slow );
8209 %}
8210
8211 instruct vsll2S_imm(vecS dst, immI8 shift) %{
8212 predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
8213 match(Set dst (LShiftVS dst shift));
8214 format %{ "psllw $dst,$shift\t! left shift packed2S" %}
8215 ins_encode %{
8216 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
8217 %}
8218 ins_pipe( pipe_slow );
8219 %}
8220
8221 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
8222 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8223 match(Set dst (LShiftVS src shift));
8224 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
8225 ins_encode %{
8226 int vector_len = 0;
8227 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8228 %}
8229 ins_pipe( pipe_slow );
8230 %}
8231
8232 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
8233 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8234 match(Set dst (LShiftVS src shift));
8235 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
8236 ins_encode %{
8237 int vector_len = 0;
8238 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8239 %}
8240 ins_pipe( pipe_slow );
8241 %}
8242
8243 instruct vsll4S(vecD dst, vecS shift) %{
8244 predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
8245 match(Set dst (LShiftVS dst shift));
8246 format %{ "psllw $dst,$shift\t! left shift packed4S" %}
8247 ins_encode %{
8248 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
8249 %}
8250 ins_pipe( pipe_slow );
8251 %}
8252
8253 instruct vsll4S_imm(vecD dst, immI8 shift) %{
8254 predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
8255 match(Set dst (LShiftVS dst shift));
8256 format %{ "psllw $dst,$shift\t! left shift packed4S" %}
8257 ins_encode %{
8258 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
8259 %}
8260 ins_pipe( pipe_slow );
8261 %}
8262
8263 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
8264 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8265 match(Set dst (LShiftVS src shift));
8266 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
8267 ins_encode %{
8268 int vector_len = 0;
8269 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8270 %}
8271 ins_pipe( pipe_slow );
8272 %}
8273
8274 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
8275 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8276 match(Set dst (LShiftVS src shift));
8277 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
8278 ins_encode %{
8279 int vector_len = 0;
8280 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8281 %}
8282 ins_pipe( pipe_slow );
8283 %}
8284
8285 instruct vsll8S(vecX dst, vecS shift) %{
8286 predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
8287 match(Set dst (LShiftVS dst shift));
8288 format %{ "psllw $dst,$shift\t! left shift packed8S" %}
8289 ins_encode %{
8290 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
8291 %}
8292 ins_pipe( pipe_slow );
8293 %}
8294
8295 instruct vsll8S_imm(vecX dst, immI8 shift) %{
8296 predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
8297 match(Set dst (LShiftVS dst shift));
8298 format %{ "psllw $dst,$shift\t! left shift packed8S" %}
8299 ins_encode %{
8300 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
8301 %}
8302 ins_pipe( pipe_slow );
8303 %}
8304
8305 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
8306 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
8307 match(Set dst (LShiftVS src shift));
8308 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
8309 ins_encode %{
8310 int vector_len = 0;
8311 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8312 %}
8313 ins_pipe( pipe_slow );
8314 %}
8315
8316 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
8317 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
8318 match(Set dst (LShiftVS src shift));
8319 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
8320 ins_encode %{
8321 int vector_len = 0;
8322 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8323 %}
8324 ins_pipe( pipe_slow );
8325 %}
8326
8327 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
8328 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
8329 match(Set dst (LShiftVS src shift));
8330 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
8331 ins_encode %{
8332 int vector_len = 1;
8333 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8334 %}
8335 ins_pipe( pipe_slow );
8336 %}
8337
8338 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
8339 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
8340 match(Set dst (LShiftVS src shift));
8341 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
8342 ins_encode %{
8343 int vector_len = 1;
8344 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8345 %}
8346 ins_pipe( pipe_slow );
8347 %}
8348
8349 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{
8350 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
8351 match(Set dst (LShiftVS src shift));
8352 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %}
8353 ins_encode %{
8354 int vector_len = 2;
8355 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8356 %}
8357 ins_pipe( pipe_slow );
8358 %}
8359
8360 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
8361 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
8362 match(Set dst (LShiftVS src shift));
8363 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %}
8364 ins_encode %{
8365 int vector_len = 2;
8366 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8367 %}
8368 ins_pipe( pipe_slow );
8369 %}
8370
8371 // Integers vector left shift
8372 instruct vsll2I(vecD dst, vecS shift) %{
8373 predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
8374 match(Set dst (LShiftVI dst shift));
8375 format %{ "pslld $dst,$shift\t! left shift packed2I" %}
8376 ins_encode %{
8377 __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
8378 %}
8379 ins_pipe( pipe_slow );
8380 %}
8381
8382 instruct vsll2I_imm(vecD dst, immI8 shift) %{
8383 predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
8384 match(Set dst (LShiftVI dst shift));
8385 format %{ "pslld $dst,$shift\t! left shift packed2I" %}
8386 ins_encode %{
8387 __ pslld($dst$$XMMRegister, (int)$shift$$constant);
8388 %}
8389 ins_pipe( pipe_slow );
8390 %}
8391
8392 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{
8393 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8394 match(Set dst (LShiftVI src shift));
8395 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
8396 ins_encode %{
8397 int vector_len = 0;
8398 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8399 %}
8400 ins_pipe( pipe_slow );
8401 %}
8402
8403 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
8404 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8405 match(Set dst (LShiftVI src shift));
8406 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
8407 ins_encode %{
8408 int vector_len = 0;
8409 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8410 %}
8411 ins_pipe( pipe_slow );
8412 %}
8413
8414 instruct vsll4I(vecX dst, vecS shift) %{
8415 predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
8416 match(Set dst (LShiftVI dst shift));
8417 format %{ "pslld $dst,$shift\t! left shift packed4I" %}
8418 ins_encode %{
8419 __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
8420 %}
8421 ins_pipe( pipe_slow );
8422 %}
8423
8424 instruct vsll4I_imm(vecX dst, immI8 shift) %{
8425 predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
8426 match(Set dst (LShiftVI dst shift));
8427 format %{ "pslld $dst,$shift\t! left shift packed4I" %}
8428 ins_encode %{
8429 __ pslld($dst$$XMMRegister, (int)$shift$$constant);
8430 %}
8431 ins_pipe( pipe_slow );
8432 %}
8433
8434 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{
8435 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8436 match(Set dst (LShiftVI src shift));
8437 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
8438 ins_encode %{
8439 int vector_len = 0;
8440 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8441 %}
8442 ins_pipe( pipe_slow );
8443 %}
8444
8445 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
8446 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8447 match(Set dst (LShiftVI src shift));
8448 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
8449 ins_encode %{
8450 int vector_len = 0;
8451 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8452 %}
8453 ins_pipe( pipe_slow );
8454 %}
8455
8456 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{
8457 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
8458 match(Set dst (LShiftVI src shift));
8459 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
8460 ins_encode %{
8461 int vector_len = 1;
8462 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8463 %}
8464 ins_pipe( pipe_slow );
8465 %}
8466
8467 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
8468 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
8469 match(Set dst (LShiftVI src shift));
8470 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
8471 ins_encode %{
8472 int vector_len = 1;
8473 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8474 %}
8475 ins_pipe( pipe_slow );
8476 %}
8477
8478 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{
8479 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
8480 match(Set dst (LShiftVI src shift));
8481 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %}
8482 ins_encode %{
8483 int vector_len = 2;
8484 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8485 %}
8486 ins_pipe( pipe_slow );
8487 %}
8488
8489 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
8490 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
8491 match(Set dst (LShiftVI src shift));
8492 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %}
8493 ins_encode %{
8494 int vector_len = 2;
8495 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8496 %}
8497 ins_pipe( pipe_slow );
8498 %}
8499
8500 // Longs vector left shift
8501 instruct vsll2L(vecX dst, vecS shift) %{
8502 predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
8503 match(Set dst (LShiftVL dst shift));
8504 format %{ "psllq $dst,$shift\t! left shift packed2L" %}
8505 ins_encode %{
8506 __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
8507 %}
8508 ins_pipe( pipe_slow );
8509 %}
8510
8511 instruct vsll2L_imm(vecX dst, immI8 shift) %{
8512 predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
8513 match(Set dst (LShiftVL dst shift));
8514 format %{ "psllq $dst,$shift\t! left shift packed2L" %}
8515 ins_encode %{
8516 __ psllq($dst$$XMMRegister, (int)$shift$$constant);
8517 %}
8518 ins_pipe( pipe_slow );
8519 %}
8520
8521 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{
8522 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8523 match(Set dst (LShiftVL src shift));
8524 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
8525 ins_encode %{
8526 int vector_len = 0;
8527 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8528 %}
8529 ins_pipe( pipe_slow );
8530 %}
8531
8532 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
8533 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8534 match(Set dst (LShiftVL src shift));
8535 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
8536 ins_encode %{
8537 int vector_len = 0;
8538 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8539 %}
8540 ins_pipe( pipe_slow );
8541 %}
8542
8543 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{
8544 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
8545 match(Set dst (LShiftVL src shift));
8546 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
8547 ins_encode %{
8548 int vector_len = 1;
8549 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8550 %}
8551 ins_pipe( pipe_slow );
8552 %}
8553
8554 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
8555 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
8556 match(Set dst (LShiftVL src shift));
8557 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
8558 ins_encode %{
8559 int vector_len = 1;
8560 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8561 %}
8562 ins_pipe( pipe_slow );
8563 %}
8564
8565 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{
8566 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
8567 match(Set dst (LShiftVL src shift));
8568 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %}
8569 ins_encode %{
8570 int vector_len = 2;
8571 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8572 %}
8573 ins_pipe( pipe_slow );
8574 %}
8575
8576 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
8577 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
8578 match(Set dst (LShiftVL src shift));
8579 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %}
8580 ins_encode %{
8581 int vector_len = 2;
8582 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8583 %}
8584 ins_pipe( pipe_slow );
8585 %}
8586
8587 // ----------------------- LogicalRightShift -----------------------------------
8588
8589 // Shorts vector logical right shift produces incorrect Java result
8590 // for negative data because java code convert short value into int with
8591 // sign extension before a shift. But char vectors are fine since chars are
8592 // unsigned values.
8593
8594 instruct vsrl2S(vecS dst, vecS shift) %{
8595 predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
8596 match(Set dst (URShiftVS dst shift));
8597 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
8598 ins_encode %{
8599 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
8600 %}
8601 ins_pipe( pipe_slow );
8602 %}
8603
8604 instruct vsrl2S_imm(vecS dst, immI8 shift) %{
8605 predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
8606 match(Set dst (URShiftVS dst shift));
8607 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
8608 ins_encode %{
8609 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
8610 %}
8611 ins_pipe( pipe_slow );
8612 %}
8613
8614 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
8615 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8616 match(Set dst (URShiftVS src shift));
8617 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
8618 ins_encode %{
8619 int vector_len = 0;
8620 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8621 %}
8622 ins_pipe( pipe_slow );
8623 %}
8624
8625 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
8626 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8627 match(Set dst (URShiftVS src shift));
8628 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
8629 ins_encode %{
8630 int vector_len = 0;
8631 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8632 %}
8633 ins_pipe( pipe_slow );
8634 %}
8635
8636 instruct vsrl4S(vecD dst, vecS shift) %{
8637 predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
8638 match(Set dst (URShiftVS dst shift));
8639 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
8640 ins_encode %{
8641 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
8642 %}
8643 ins_pipe( pipe_slow );
8644 %}
8645
8646 instruct vsrl4S_imm(vecD dst, immI8 shift) %{
8647 predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
8648 match(Set dst (URShiftVS dst shift));
8649 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
8650 ins_encode %{
8651 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
8652 %}
8653 ins_pipe( pipe_slow );
8654 %}
8655
8656 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
8657 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8658 match(Set dst (URShiftVS src shift));
8659 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
8660 ins_encode %{
8661 int vector_len = 0;
8662 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8663 %}
8664 ins_pipe( pipe_slow );
8665 %}
8666
8667 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
8668 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8669 match(Set dst (URShiftVS src shift));
8670 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
8671 ins_encode %{
8672 int vector_len = 0;
8673 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8674 %}
8675 ins_pipe( pipe_slow );
8676 %}
8677
8678 instruct vsrl8S(vecX dst, vecS shift) %{
8679 predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
8680 match(Set dst (URShiftVS dst shift));
8681 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
8682 ins_encode %{
8683 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
8684 %}
8685 ins_pipe( pipe_slow );
8686 %}
8687
8688 instruct vsrl8S_imm(vecX dst, immI8 shift) %{
8689 predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
8690 match(Set dst (URShiftVS dst shift));
8691 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
8692 ins_encode %{
8693 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
8694 %}
8695 ins_pipe( pipe_slow );
8696 %}
8697
8698 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
8699 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
8700 match(Set dst (URShiftVS src shift));
8701 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
8702 ins_encode %{
8703 int vector_len = 0;
8704 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8705 %}
8706 ins_pipe( pipe_slow );
8707 %}
8708
8709 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
8710 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
8711 match(Set dst (URShiftVS src shift));
8712 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
8713 ins_encode %{
8714 int vector_len = 0;
8715 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8716 %}
8717 ins_pipe( pipe_slow );
8718 %}
8719
8720 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
8721 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
8722 match(Set dst (URShiftVS src shift));
8723 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
8724 ins_encode %{
8725 int vector_len = 1;
8726 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8727 %}
8728 ins_pipe( pipe_slow );
8729 %}
8730
8731 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
8732 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
8733 match(Set dst (URShiftVS src shift));
8734 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
8735 ins_encode %{
8736 int vector_len = 1;
8737 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8738 %}
8739 ins_pipe( pipe_slow );
8740 %}
8741
8742 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{
8743 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
8744 match(Set dst (URShiftVS src shift));
8745 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %}
8746 ins_encode %{
8747 int vector_len = 2;
8748 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8749 %}
8750 ins_pipe( pipe_slow );
8751 %}
8752
8753 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
8754 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
8755 match(Set dst (URShiftVS src shift));
8756 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %}
8757 ins_encode %{
8758 int vector_len = 2;
8759 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8760 %}
8761 ins_pipe( pipe_slow );
8762 %}
8763
8764 // Integers vector logical right shift
8765 instruct vsrl2I(vecD dst, vecS shift) %{
8766 predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
8767 match(Set dst (URShiftVI dst shift));
8768 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
8769 ins_encode %{
8770 __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
8771 %}
8772 ins_pipe( pipe_slow );
8773 %}
8774
8775 instruct vsrl2I_imm(vecD dst, immI8 shift) %{
8776 predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
8777 match(Set dst (URShiftVI dst shift));
8778 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
8779 ins_encode %{
8780 __ psrld($dst$$XMMRegister, (int)$shift$$constant);
8781 %}
8782 ins_pipe( pipe_slow );
8783 %}
8784
8785 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{
8786 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8787 match(Set dst (URShiftVI src shift));
8788 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
8789 ins_encode %{
8790 int vector_len = 0;
8791 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8792 %}
8793 ins_pipe( pipe_slow );
8794 %}
8795
8796 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
8797 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8798 match(Set dst (URShiftVI src shift));
8799 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
8800 ins_encode %{
8801 int vector_len = 0;
8802 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8803 %}
8804 ins_pipe( pipe_slow );
8805 %}
8806
8807 instruct vsrl4I(vecX dst, vecS shift) %{
8808 predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
8809 match(Set dst (URShiftVI dst shift));
8810 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
8811 ins_encode %{
8812 __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
8813 %}
8814 ins_pipe( pipe_slow );
8815 %}
8816
8817 instruct vsrl4I_imm(vecX dst, immI8 shift) %{
8818 predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
8819 match(Set dst (URShiftVI dst shift));
8820 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
8821 ins_encode %{
8822 __ psrld($dst$$XMMRegister, (int)$shift$$constant);
8823 %}
8824 ins_pipe( pipe_slow );
8825 %}
8826
8827 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{
8828 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8829 match(Set dst (URShiftVI src shift));
8830 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
8831 ins_encode %{
8832 int vector_len = 0;
8833 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8834 %}
8835 ins_pipe( pipe_slow );
8836 %}
8837
8838 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
8839 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
8840 match(Set dst (URShiftVI src shift));
8841 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
8842 ins_encode %{
8843 int vector_len = 0;
8844 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8845 %}
8846 ins_pipe( pipe_slow );
8847 %}
8848
8849 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{
8850 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
8851 match(Set dst (URShiftVI src shift));
8852 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
8853 ins_encode %{
8854 int vector_len = 1;
8855 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8856 %}
8857 ins_pipe( pipe_slow );
8858 %}
8859
8860 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
8861 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
8862 match(Set dst (URShiftVI src shift));
8863 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
8864 ins_encode %{
8865 int vector_len = 1;
8866 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8867 %}
8868 ins_pipe( pipe_slow );
8869 %}
8870
8871 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{
8872 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
8873 match(Set dst (URShiftVI src shift));
8874 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %}
8875 ins_encode %{
8876 int vector_len = 2;
8877 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8878 %}
8879 ins_pipe( pipe_slow );
8880 %}
8881
8882 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
8883 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
8884 match(Set dst (URShiftVI src shift));
8885 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %}
8886 ins_encode %{
8887 int vector_len = 2;
8888 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8889 %}
8890 ins_pipe( pipe_slow );
8891 %}
8892
8893 // Longs vector logical right shift
8894 instruct vsrl2L(vecX dst, vecS shift) %{
8895 predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
8896 match(Set dst (URShiftVL dst shift));
8897 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
8898 ins_encode %{
8899 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
8900 %}
8901 ins_pipe( pipe_slow );
8902 %}
8903
8904 instruct vsrl2L_imm(vecX dst, immI8 shift) %{
8905 predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
8906 match(Set dst (URShiftVL dst shift));
8907 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
8908 ins_encode %{
8909 __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
8910 %}
8911 ins_pipe( pipe_slow );
8912 %}
8913
8914 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{
8915 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8916 match(Set dst (URShiftVL src shift));
8917 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
8918 ins_encode %{
8919 int vector_len = 0;
8920 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8921 %}
8922 ins_pipe( pipe_slow );
8923 %}
8924
8925 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
8926 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8927 match(Set dst (URShiftVL src shift));
8928 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
8929 ins_encode %{
8930 int vector_len = 0;
8931 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8932 %}
8933 ins_pipe( pipe_slow );
8934 %}
8935
8936 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{
8937 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
8938 match(Set dst (URShiftVL src shift));
8939 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
8940 ins_encode %{
8941 int vector_len = 1;
8942 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8943 %}
8944 ins_pipe( pipe_slow );
8945 %}
8946
8947 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
8948 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
8949 match(Set dst (URShiftVL src shift));
8950 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
8951 ins_encode %{
8952 int vector_len = 1;
8953 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8954 %}
8955 ins_pipe( pipe_slow );
8956 %}
8957
8958 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{
8959 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
8960 match(Set dst (URShiftVL src shift));
8961 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %}
8962 ins_encode %{
8963 int vector_len = 2;
8964 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8965 %}
8966 ins_pipe( pipe_slow );
8967 %}
8968
8969 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
8970 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
8971 match(Set dst (URShiftVL src shift));
8972 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %}
8973 ins_encode %{
8974 int vector_len = 2;
8975 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
8976 %}
8977 ins_pipe( pipe_slow );
8978 %}
8979
8980 // ------------------- ArithmeticRightShift -----------------------------------
8981
8982 // Shorts/Chars vector arithmetic right shift
8983 instruct vsra2S(vecS dst, vecS shift) %{
8984 predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
8985 match(Set dst (RShiftVS dst shift));
8986 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
8987 ins_encode %{
8988 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
8989 %}
8990 ins_pipe( pipe_slow );
8991 %}
8992
8993 instruct vsra2S_imm(vecS dst, immI8 shift) %{
8994 predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
8995 match(Set dst (RShiftVS dst shift));
8996 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
8997 ins_encode %{
8998 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
8999 %}
9000 ins_pipe( pipe_slow );
9001 %}
9002
9003 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
9004 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
9005 match(Set dst (RShiftVS src shift));
9006 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
9007 ins_encode %{
9008 int vector_len = 0;
9009 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9010 %}
9011 ins_pipe( pipe_slow );
9012 %}
9013
9014 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
9015 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
9016 match(Set dst (RShiftVS src shift));
9017 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
9018 ins_encode %{
9019 int vector_len = 0;
9020 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9021 %}
9022 ins_pipe( pipe_slow );
9023 %}
9024
9025 instruct vsra4S(vecD dst, vecS shift) %{
9026 predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
9027 match(Set dst (RShiftVS dst shift));
9028 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
9029 ins_encode %{
9030 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
9031 %}
9032 ins_pipe( pipe_slow );
9033 %}
9034
9035 instruct vsra4S_imm(vecD dst, immI8 shift) %{
9036 predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
9037 match(Set dst (RShiftVS dst shift));
9038 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
9039 ins_encode %{
9040 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
9041 %}
9042 ins_pipe( pipe_slow );
9043 %}
9044
9045 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
9046 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
9047 match(Set dst (RShiftVS src shift));
9048 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
9049 ins_encode %{
9050 int vector_len = 0;
9051 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9052 %}
9053 ins_pipe( pipe_slow );
9054 %}
9055
9056 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
9057 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
9058 match(Set dst (RShiftVS src shift));
9059 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
9060 ins_encode %{
9061 int vector_len = 0;
9062 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9063 %}
9064 ins_pipe( pipe_slow );
9065 %}
9066
9067 instruct vsra8S(vecX dst, vecS shift) %{
9068 predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
9069 match(Set dst (RShiftVS dst shift));
9070 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
9071 ins_encode %{
9072 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
9073 %}
9074 ins_pipe( pipe_slow );
9075 %}
9076
9077 instruct vsra8S_imm(vecX dst, immI8 shift) %{
9078 predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
9079 match(Set dst (RShiftVS dst shift));
9080 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
9081 ins_encode %{
9082 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
9083 %}
9084 ins_pipe( pipe_slow );
9085 %}
9086
9087 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
9088 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
9089 match(Set dst (RShiftVS src shift));
9090 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
9091 ins_encode %{
9092 int vector_len = 0;
9093 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9094 %}
9095 ins_pipe( pipe_slow );
9096 %}
9097
9098 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
9099 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
9100 match(Set dst (RShiftVS src shift));
9101 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
9102 ins_encode %{
9103 int vector_len = 0;
9104 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9105 %}
9106 ins_pipe( pipe_slow );
9107 %}
9108
9109 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
9110 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
9111 match(Set dst (RShiftVS src shift));
9112 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
9113 ins_encode %{
9114 int vector_len = 1;
9115 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9116 %}
9117 ins_pipe( pipe_slow );
9118 %}
9119
9120 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
9121 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
9122 match(Set dst (RShiftVS src shift));
9123 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
9124 ins_encode %{
9125 int vector_len = 1;
9126 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9127 %}
9128 ins_pipe( pipe_slow );
9129 %}
9130
9131 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{
9132 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
9133 match(Set dst (RShiftVS src shift));
9134 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %}
9135 ins_encode %{
9136 int vector_len = 2;
9137 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9138 %}
9139 ins_pipe( pipe_slow );
9140 %}
9141
9142 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
9143 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
9144 match(Set dst (RShiftVS src shift));
9145 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %}
9146 ins_encode %{
9147 int vector_len = 2;
9148 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9149 %}
9150 ins_pipe( pipe_slow );
9151 %}
9152
9153 // Integers vector arithmetic right shift
9154 instruct vsra2I(vecD dst, vecS shift) %{
9155 predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
9156 match(Set dst (RShiftVI dst shift));
9157 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
9158 ins_encode %{
9159 __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
9160 %}
9161 ins_pipe( pipe_slow );
9162 %}
9163
9164 instruct vsra2I_imm(vecD dst, immI8 shift) %{
9165 predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
9166 match(Set dst (RShiftVI dst shift));
9167 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
9168 ins_encode %{
9169 __ psrad($dst$$XMMRegister, (int)$shift$$constant);
9170 %}
9171 ins_pipe( pipe_slow );
9172 %}
9173
9174 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{
9175 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
9176 match(Set dst (RShiftVI src shift));
9177 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
9178 ins_encode %{
9179 int vector_len = 0;
9180 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9181 %}
9182 ins_pipe( pipe_slow );
9183 %}
9184
9185 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
9186 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
9187 match(Set dst (RShiftVI src shift));
9188 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
9189 ins_encode %{
9190 int vector_len = 0;
9191 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9192 %}
9193 ins_pipe( pipe_slow );
9194 %}
9195
9196 instruct vsra4I(vecX dst, vecS shift) %{
9197 predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
9198 match(Set dst (RShiftVI dst shift));
9199 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
9200 ins_encode %{
9201 __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
9202 %}
9203 ins_pipe( pipe_slow );
9204 %}
9205
9206 instruct vsra4I_imm(vecX dst, immI8 shift) %{
9207 predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
9208 match(Set dst (RShiftVI dst shift));
9209 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
9210 ins_encode %{
9211 __ psrad($dst$$XMMRegister, (int)$shift$$constant);
9212 %}
9213 ins_pipe( pipe_slow );
9214 %}
9215
9216 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{
9217 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
9218 match(Set dst (RShiftVI src shift));
9219 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
9220 ins_encode %{
9221 int vector_len = 0;
9222 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9223 %}
9224 ins_pipe( pipe_slow );
9225 %}
9226
9227 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
9228 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
9229 match(Set dst (RShiftVI src shift));
9230 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
9231 ins_encode %{
9232 int vector_len = 0;
9233 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9234 %}
9235 ins_pipe( pipe_slow );
9236 %}
9237
9238 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{
9239 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
9240 match(Set dst (RShiftVI src shift));
9241 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
9242 ins_encode %{
9243 int vector_len = 1;
9244 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9245 %}
9246 ins_pipe( pipe_slow );
9247 %}
9248
9249 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
9250 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
9251 match(Set dst (RShiftVI src shift));
9252 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
9253 ins_encode %{
9254 int vector_len = 1;
9255 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9256 %}
9257 ins_pipe( pipe_slow );
9258 %}
9259
9260 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{
9261 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
9262 match(Set dst (RShiftVI src shift));
9263 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %}
9264 ins_encode %{
9265 int vector_len = 2;
9266 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9267 %}
9268 ins_pipe( pipe_slow );
9269 %}
9270
9271 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
9272 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
9273 match(Set dst (RShiftVI src shift));
9274 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %}
9275 ins_encode %{
9276 int vector_len = 2;
9277 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
9278 %}
9279 ins_pipe( pipe_slow );
9280 %}
9281
9282 // There are no longs vector arithmetic right shift instructions.
9283
9284
9285 // --------------------------------- AND --------------------------------------
9286
9287 instruct vand4B(vecS dst, vecS src) %{
9288 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4);
9289 match(Set dst (AndV dst src));
9290 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %}
9291 ins_encode %{
9292 __ pand($dst$$XMMRegister, $src$$XMMRegister);
9293 %}
9294 ins_pipe( pipe_slow );
9295 %}
9296
9297 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
9298 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
9299 match(Set dst (AndV src1 src2));
9300 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %}
9301 ins_encode %{
9302 int vector_len = 0;
9303 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
9304 %}
9305 ins_pipe( pipe_slow );
9306 %}
9307
9308 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{
9309 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
9310 match(Set dst (AndV src (LoadVector mem)));
9311 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %}
9312 ins_encode %{
9313 int vector_len = 0;
9314 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
9315 %}
9316 ins_pipe( pipe_slow );
9317 %}
9318
9319 instruct vand8B(vecD dst, vecD src) %{
9320 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8);
9321 match(Set dst (AndV dst src));
9322 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %}
9323 ins_encode %{
9324 __ pand($dst$$XMMRegister, $src$$XMMRegister);
9325 %}
9326 ins_pipe( pipe_slow );
9327 %}
9328
9329 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
9330 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
9331 match(Set dst (AndV src1 src2));
9332 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %}
9333 ins_encode %{
9334 int vector_len = 0;
9335 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
9336 %}
9337 ins_pipe( pipe_slow );
9338 %}
9339
9340 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{
9341 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
9342 match(Set dst (AndV src (LoadVector mem)));
9343 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %}
9344 ins_encode %{
9345 int vector_len = 0;
9346 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
9347 %}
9348 ins_pipe( pipe_slow );
9349 %}
9350
9351 instruct vand16B(vecX dst, vecX src) %{
9352 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16);
9353 match(Set dst (AndV dst src));
9354 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %}
9355 ins_encode %{
9356 __ pand($dst$$XMMRegister, $src$$XMMRegister);
9357 %}
9358 ins_pipe( pipe_slow );
9359 %}
9360
9361 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
9362 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
9363 match(Set dst (AndV src1 src2));
9364 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %}
9365 ins_encode %{
9366 int vector_len = 0;
9687 %}
9688
9689 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
9690 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
9691 match(Set dst (XorV src1 src2));
9692 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %}
9693 ins_encode %{
9694 int vector_len = 2;
9695 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
9696 %}
9697 ins_pipe( pipe_slow );
9698 %}
9699
9700 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{
9701 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
9702 match(Set dst (XorV src (LoadVector mem)));
9703 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %}
9704 ins_encode %{
9705 int vector_len = 2;
9706 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
9707 %}
9708 ins_pipe( pipe_slow );
9709 %}
9710
9711 // --------------------------------- FMA --------------------------------------
9712
9713 // a * b + c
9714 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{
9715 predicate(UseFMA && n->as_Vector()->length() == 2);
9716 match(Set c (FmaVD c (Binary a b)));
9717 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %}
9718 ins_cost(150);
9719 ins_encode %{
9720 int vector_len = 0;
9721 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len);
9722 %}
9723 ins_pipe( pipe_slow );
9724 %}
9725
9726 // a * b + c
|
1355 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
1356 __ end_a_stub();
1357 return offset;
1358 }
1359
1360
1361 //=============================================================================
1362
1363 // Float masks come from different places depending on platform.
1364 #ifdef _LP64
1365 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
1366 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
1367 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
1368 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
1369 #else
1370 static address float_signmask() { return (address)float_signmask_pool; }
1371 static address float_signflip() { return (address)float_signflip_pool; }
1372 static address double_signmask() { return (address)double_signmask_pool; }
1373 static address double_signflip() { return (address)double_signflip_pool; }
1374 #endif
1375 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
1376 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
1377 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
1378
1379 //=============================================================================
1380 const bool Matcher::match_rule_supported(int opcode) {
1381 if (!has_match_rule(opcode))
1382 return false;
1383
1384 bool ret_value = true;
1385 switch (opcode) {
1386 case Op_AbsVL:
1387 if (UseAVX < 3)
1388 ret_value = false;
1389 case Op_PopCountI:
1390 case Op_PopCountL:
1391 if (!UsePopCountInstruction)
1392 ret_value = false;
1393 break;
1394 case Op_PopCountVI:
1395 if (!UsePopCountInstruction || !VM_Version::supports_vpopcntdq())
1396 ret_value = false;
1397 break;
1398 case Op_MulVI:
1399 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
1400 ret_value = false;
1401 break;
1402 case Op_MulVL:
1403 case Op_MulReductionVL:
1404 if (VM_Version::supports_avx512dq() == false)
1405 ret_value = false;
1406 break;
1407 case Op_AddReductionVL:
1408 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here
1409 ret_value = false;
1410 break;
1411 case Op_AbsVB:
1412 case Op_AbsVS:
1413 case Op_AbsVI:
1414 case Op_AddReductionVI:
1415 if (UseSSE < 3) // requires at least SSE3
1416 ret_value = false;
1417 break;
1418 case Op_MulReductionVI:
1419 if (UseSSE < 4) // requires at least SSE4
1420 ret_value = false;
1421 break;
1422 case Op_AddReductionVF:
1423 case Op_AddReductionVD:
1424 case Op_MulReductionVF:
1425 case Op_MulReductionVD:
1426 if (UseSSE < 1) // requires at least SSE
1427 ret_value = false;
1428 break;
1429 case Op_SqrtVD:
1430 case Op_SqrtVF:
1431 if (UseAVX < 1) // enabled for AVX only
1432 ret_value = false;
1433 break;
1439 ret_value = false;
1440 break;
1441 case Op_CMoveVF:
1442 case Op_CMoveVD:
1443 if (UseAVX < 1 || UseAVX > 2)
1444 ret_value = false;
1445 break;
1446 case Op_StrIndexOf:
1447 if (!UseSSE42Intrinsics)
1448 ret_value = false;
1449 break;
1450 case Op_StrIndexOfChar:
1451 if (!UseSSE42Intrinsics)
1452 ret_value = false;
1453 break;
1454 case Op_OnSpinWait:
1455 if (VM_Version::supports_on_spin_wait() == false)
1456 ret_value = false;
1457 break;
1458 case Op_MulAddVS2VI:
1459 case Op_RShiftVL:
1460 case Op_AbsVD:
1461 case Op_NegVD:
1462 if (UseSSE < 2)
1463 ret_value = false;
1464 break;
1465 case Op_MulVB:
1466 case Op_LShiftVB:
1467 case Op_RShiftVB:
1468 case Op_URShiftVB:
1469 if (UseSSE < 4)
1470 ret_value = false;
1471 break;
1472 #ifdef _LP64
1473 case Op_MaxD:
1474 case Op_MaxF:
1475 case Op_MinD:
1476 case Op_MinF:
1477 if (UseAVX < 1) // enabled for AVX only
1478 ret_value = false;
1479 break;
1480 #endif
1481 }
1482
1483 return ret_value; // Per default match rules are supported.
1484 }
1485
1486 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
1487 // identify extra cases that we might want to provide match rules for
1488 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
1489 bool ret_value = match_rule_supported(opcode);
1490 if (ret_value) {
1491 switch (opcode) {
1492 case Op_AbsVB:
1493 case Op_AddVB:
1494 case Op_SubVB:
1495 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false))
1496 ret_value = false;
1497 break;
1498 case Op_AbsVS:
1499 case Op_AddVS:
1500 case Op_SubVS:
1501 case Op_MulVS:
1502 case Op_LShiftVS:
1503 case Op_RShiftVS:
1504 case Op_URShiftVS:
1505 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false))
1506 ret_value = false;
1507 break;
1508 case Op_MulVB:
1509 case Op_LShiftVB:
1510 case Op_RShiftVB:
1511 case Op_URShiftVB:
1512 if ((vlen == 32 && UseAVX < 2) ||
1513 ((vlen == 64) && (VM_Version::supports_avx512bw() == false)))
1514 ret_value = false;
1515 break;
1516 case Op_NegVF:
1517 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false))
1518 ret_value = false;
1519 break;
1520 case Op_CMoveVF:
1521 if (vlen != 8)
1522 ret_value = false;
1523 break;
1524 case Op_NegVD:
1525 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false))
1526 ret_value = false;
1527 break;
1528 case Op_CMoveVD:
1529 if (vlen != 4)
1530 ret_value = false;
1531 break;
1532 }
1533 }
1534
1535 return ret_value; // Per default match rules are supported.
1536 }
1537
1538 const bool Matcher::has_predicated_vectors(void) {
1539 bool ret_value = false;
1540 if (UseAVX > 2) {
1541 ret_value = VM_Version::supports_avx512vl();
1542 }
1543
1544 return ret_value;
1545 }
1546
1547 const int Matcher::float_pressure(int default_pressure_threshold) {
7322 ins_encode %{
7323 int vector_len = 2;
7324 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7325 %}
7326 ins_pipe( pipe_slow );
7327 %}
7328
7329 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{
7330 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7331 match(Set dst (SubVD src (LoadVector mem)));
7332 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %}
7333 ins_encode %{
7334 int vector_len = 2;
7335 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7336 %}
7337 ins_pipe( pipe_slow );
7338 %}
7339
7340 // --------------------------------- MUL --------------------------------------
7341
7342 // Byte vector mul
7343 instruct mul4B_reg(vecS dst, vecS src1, vecS src2, vecS tmp, rRegI scratch) %{
7344 predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
7345 match(Set dst (MulVB src1 src2));
7346 effect(TEMP dst, TEMP tmp, TEMP scratch);
7347 format %{"pmovsxbw $tmp,$src1\n\t"
7348 "pmovsxbw $dst,$src2\n\t"
7349 "pmullw $tmp,$dst\n\t"
7350 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t"
7351 "pand $dst,$tmp\n\t"
7352 "packuswb $dst,$dst\t! mul packed4B" %}
7353 ins_encode %{
7354 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister);
7355 __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister);
7356 __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister);
7357 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
7358 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
7359 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
7360 %}
7361 ins_pipe( pipe_slow );
7362 %}
7363
7364 instruct mul8B_reg(vecD dst, vecD src1, vecD src2, vecD tmp, rRegI scratch) %{
7365 predicate(UseSSE > 3 && n->as_Vector()->length() == 8);
7366 match(Set dst (MulVB src1 src2));
7367 effect(TEMP dst, TEMP tmp, TEMP scratch);
7368 format %{"pmovsxbw $tmp,$src1\n\t"
7369 "pmovsxbw $dst,$src2\n\t"
7370 "pmullw $tmp,$dst\n\t"
7371 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t"
7372 "pand $dst,$tmp\n\t"
7373 "packuswb $dst,$dst\t! mul packed8B" %}
7374 ins_encode %{
7375 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister);
7376 __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister);
7377 __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister);
7378 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
7379 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
7380 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
7381 %}
7382 ins_pipe( pipe_slow );
7383 %}
7384
7385 instruct mul16B_reg(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2, rRegI scratch) %{
7386 predicate(UseSSE > 3 && n->as_Vector()->length() == 16);
7387 match(Set dst (MulVB src1 src2));
7388 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
7389 format %{"pmovsxbw $tmp1,$src1\n\t"
7390 "pmovsxbw $tmp2,$src2\n\t"
7391 "pmullw $tmp1,$tmp2\n\t"
7392 "pshufd $tmp2,$src1,0xEE\n\t"
7393 "pshufd $dst,$src2,0xEE\n\t"
7394 "pmovsxbw $tmp2,$tmp2\n\t"
7395 "pmovsxbw $dst,$dst\n\t"
7396 "pmullw $tmp2,$dst\n\t"
7397 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t"
7398 "pand $tmp2,$dst\n\t"
7399 "pand $dst,$tmp1\n\t"
7400 "packuswb $dst,$tmp2\t! mul packed16B" %}
7401 ins_encode %{
7402 __ pmovsxbw($tmp1$$XMMRegister, $src1$$XMMRegister);
7403 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister);
7404 __ pmullw($tmp1$$XMMRegister, $tmp2$$XMMRegister);
7405 __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 0xEE);
7406 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xEE);
7407 __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister);
7408 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister);
7409 __ pmullw($tmp2$$XMMRegister, $dst$$XMMRegister);
7410 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
7411 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
7412 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
7413 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
7414 %}
7415 ins_pipe( pipe_slow );
7416 %}
7417
7418 instruct vmul16B_reg_avx(vecX dst, vecX src1, vecX src2, vecX tmp, rRegI scratch) %{
7419 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
7420 match(Set dst (MulVB src1 src2));
7421 effect(TEMP dst, TEMP tmp, TEMP scratch);
7422 format %{"vpmovsxbw $tmp,$src1\n\t"
7423 "vpmovsxbw $dst,$src2\n\t"
7424 "vpmullw $tmp,$tmp,$dst\n\t"
7425 "vmovdqu $dst,[0x00ff00ff0x00ff00ff]\n\t"
7426 "vpand $dst,$dst,$tmp\n\t"
7427 "vextracti128_high $tmp,$dst\n\t"
7428 "vpackuswb $dst,$dst,$dst\n\t! mul packed16B" %}
7429 ins_encode %{
7430 int vector_len = 1;
7431 __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len);
7432 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len);
7433 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vector_len);
7434 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
7435 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
7436 __ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister);
7437 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0);
7438 %}
7439 ins_pipe( pipe_slow );
7440 %}
7441
7442 instruct vmul32B_reg_avx(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2, rRegI scratch) %{
7443 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
7444 match(Set dst (MulVB src1 src2));
7445 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
7446 format %{"vextracti128_high $tmp1,$src1\n\t"
7447 "vextracti128_high $dst,$src2\n\t"
7448 "vpmovsxbw $tmp1,$tmp1\n\t"
7449 "vpmovsxbw $dst,$dst\n\t"
7450 "vpmullw $tmp1,$tmp1,$dst\n\t"
7451 "vpmovsxbw $tmp2,$src1\n\t"
7452 "vpmovsxbw $dst,$src2\n\t"
7453 "vpmullw $tmp2,$tmp2,$dst\n\t"
7454 "vmovdqu $dst, [0x00ff00ff0x00ff00ff]\n\t"
7455 "vpbroadcastd $dst, $dst\n\t"
7456 "vpand $tmp1,$tmp1,$dst\n\t"
7457 "vpand $dst,$dst,$tmp2\n\t"
7458 "vpackuswb $dst,$dst,$tmp1\n\t"
7459 "vpermq $dst, $dst, 0xD8\t! mul packed32B" %}
7460 ins_encode %{
7461 int vector_len = 1;
7462 __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister);
7463 __ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister);
7464 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len);
7465 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
7466 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
7467 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len);
7468 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len);
7469 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
7470 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
7471 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
7472 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
7473 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len);
7474 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vector_len);
7475 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len);
7476 %}
7477 ins_pipe( pipe_slow );
7478 %}
7479
7480 instruct vmul64B_reg_avx(vecZ dst, vecZ src1, vecZ src2, vecZ tmp1, vecZ tmp2, rRegI scratch) %{
7481 predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
7482 match(Set dst (MulVB src1 src2));
7483 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
7484 format %{"vextracti64x4_high $tmp1,$src1\n\t"
7485 "vextracti64x4_high $dst,$src2\n\t"
7486 "vpmovsxbw $tmp1,$tmp1\n\t"
7487 "vpmovsxbw $dst,$dst\n\t"
7488 "vpmullw $tmp1,$tmp1,$dst\n\t"
7489 "vpmovsxbw $tmp2,$src1\n\t"
7490 "vpmovsxbw $dst,$src2\n\t"
7491 "vpmullw $tmp2,$tmp2,$dst\n\t"
7492 "vmovdqu $dst, [0x00ff00ff0x00ff00ff]\n\t"
7493 "vpbroadcastd $dst, $dst\n\t"
7494 "vpand $tmp1,$tmp1,$dst\n\t"
7495 "vpand $tmp2,$tmp2,$dst\n\t"
7496 "vpackuswb $dst,$tmp1,$tmp2\n\t"
7497 "evmovdquq $tmp2,[0x0604020007050301]\n\t"
7498 "vpermq $dst,$tmp2,$dst,0x01\t! mul packed64B" %}
7499
7500 ins_encode %{
7501 int vector_len = 2;
7502 __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister);
7503 __ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister);
7504 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len);
7505 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
7506 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
7507 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len);
7508 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len);
7509 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
7510 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
7511 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
7512 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
7513 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
7514 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len);
7515 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register);
7516 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
7517
7518 %}
7519 ins_pipe( pipe_slow );
7520 %}
7521
7522 // Shorts/Chars vector mul
7523 instruct vmul2S(vecS dst, vecS src) %{
7524 predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
7525 match(Set dst (MulVS dst src));
7526 format %{ "pmullw $dst,$src\t! mul packed2S" %}
7527 ins_encode %{
7528 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
7529 %}
7530 ins_pipe( pipe_slow );
7531 %}
7532
7533 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
7534 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7535 match(Set dst (MulVS src1 src2));
7536 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
7537 ins_encode %{
7538 int vector_len = 0;
7539 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7540 %}
7541 ins_pipe( pipe_slow );
8224 match(Set dst (DivVD src1 src2));
8225 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %}
8226 ins_encode %{
8227 int vector_len = 2;
8228 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8229 %}
8230 ins_pipe( pipe_slow );
8231 %}
8232
8233 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{
8234 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
8235 match(Set dst (DivVD src (LoadVector mem)));
8236 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %}
8237 ins_encode %{
8238 int vector_len = 2;
8239 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8240 %}
8241 ins_pipe( pipe_slow );
8242 %}
8243
8244 // --------------------------------- Sqrt --------------------------------------
8245
8246 // Floating point vector sqrt
8247 instruct vsqrt2D_reg(vecX dst, vecX src) %{
8248 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8249 match(Set dst (SqrtVD src));
8250 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %}
8251 ins_encode %{
8252 int vector_len = 0;
8253 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
8254 %}
8255 ins_pipe( pipe_slow );
8256 %}
8257
8258 instruct vsqrt2D_mem(vecX dst, memory mem) %{
8259 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
8260 match(Set dst (SqrtVD (LoadVector mem)));
8261 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %}
8262 ins_encode %{
8263 int vector_len = 0;
8381 match(Set dst (SqrtVF src));
8382 format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %}
8383 ins_encode %{
8384 int vector_len = 2;
8385 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len);
8386 %}
8387 ins_pipe( pipe_slow );
8388 %}
8389
8390 instruct vsqrt16F_mem(vecZ dst, memory mem) %{
8391 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
8392 match(Set dst (SqrtVF (LoadVector mem)));
8393 format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %}
8394 ins_encode %{
8395 int vector_len = 2;
8396 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len);
8397 %}
8398 ins_pipe( pipe_slow );
8399 %}
8400
8401 // ------------------------------ Shift ---------------------------------------
8402
8403 // Left and right shift count vectors are the same on x86
8404 // (only lowest bits of xmm reg are used for count).
8405 instruct vshiftcnt(vecS dst, rRegI cnt) %{
8406 match(Set dst (LShiftCntV cnt));
8407 match(Set dst (RShiftCntV cnt));
8408 format %{ "movdl $dst,$cnt\t! load shift count" %}
8409 ins_encode %{
8410 __ movdl($dst$$XMMRegister, $cnt$$Register);
8411 %}
8412 ins_pipe( pipe_slow );
8413 %}
8414
8415 instruct vshiftcntimm(vecS dst, immI8 cnt, rRegI tmp) %{
8416 match(Set dst cnt);
8417 effect(TEMP tmp);
8418 format %{ "movl $tmp,$cnt\t"
8419 "movdl $dst,$tmp\t! load shift count" %}
8420 ins_encode %{
8421 __ movl($tmp$$Register, $cnt$$constant);
8422 __ movdl($dst$$XMMRegister, $tmp$$Register);
8423 %}
8424 ins_pipe( pipe_slow );
8425 %}
8426
8427 // Byte vector shift
8428 instruct vshift4B(vecS dst, vecS src, vecS shift, vecS tmp, rRegI scratch) %{
8429 predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
8430 match(Set dst (LShiftVB src shift));
8431 match(Set dst (RShiftVB src shift));
8432 match(Set dst (URShiftVB src shift));
8433 effect(TEMP dst, TEMP tmp, TEMP scratch);
8434 format %{"vextendbw $tmp,$src\n\t"
8435 "vshiftw $tmp,$shift\n\t"
8436 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t"
8437 "pand $dst,$tmp\n\t"
8438 "packuswb $dst,$dst\n\t ! packed4B shift" %}
8439 ins_encode %{
8440 int opcode = this->as_Mach()->ideal_Opcode();
8441
8442 __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister);
8443 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
8444 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
8445 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
8446 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
8447 %}
8448 ins_pipe( pipe_slow );
8449 %}
8450
8451 instruct vshift8B(vecD dst, vecD src, vecS shift, vecD tmp, rRegI scratch) %{
8452 predicate(UseSSE > 3 && n->as_Vector()->length() == 8);
8453 match(Set dst (LShiftVB src shift));
8454 match(Set dst (RShiftVB src shift));
8455 match(Set dst (URShiftVB src shift));
8456 effect(TEMP dst, TEMP tmp, TEMP scratch);
8457 format %{"vextendbw $tmp,$src\n\t"
8458 "vshiftw $tmp,$shift\n\t"
8459 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t"
8460 "pand $dst,$tmp\n\t"
8461 "packuswb $dst,$dst\n\t ! packed8B shift" %}
8462 ins_encode %{
8463 int opcode = this->as_Mach()->ideal_Opcode();
8464
8465 __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister);
8466 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
8467 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
8468 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
8469 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
8470 %}
8471 ins_pipe( pipe_slow );
8472 %}
8473
8474 instruct vshift16B(vecX dst, vecX src, vecS shift, vecX tmp1, vecX tmp2, rRegI scratch) %{
8475 predicate(UseSSE > 3 && UseAVX <= 1 && n->as_Vector()->length() == 16);
8476 match(Set dst (LShiftVB src shift));
8477 match(Set dst (RShiftVB src shift));
8478 match(Set dst (URShiftVB src shift));
8479 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
8480 format %{"vextendbw $tmp1,$src\n\t"
8481 "vshiftw $tmp1,$shift\n\t"
8482 "pshufd $tmp2,$src\n\t"
8483 "vextendbw $tmp2,$tmp2\n\t"
8484 "vshiftw $tmp2,$shift\n\t"
8485 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t"
8486 "pand $tmp2,$dst\n\t"
8487 "pand $dst,$tmp1\n\t"
8488 "packuswb $dst,$tmp2\n\t! packed16B shift" %}
8489 ins_encode %{
8490 int opcode = this->as_Mach()->ideal_Opcode();
8491
8492 __ vextendbw(opcode, $tmp1$$XMMRegister, $src$$XMMRegister);
8493 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
8494 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
8495 __ vextendbw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
8496 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
8497 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
8498 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
8499 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
8500 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
8501 %}
8502 ins_pipe( pipe_slow );
8503 %}
8504
8505 instruct vshift16B_avx(vecX dst, vecX src, vecS shift, vecX tmp, rRegI scratch) %{
8506 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
8507 match(Set dst (LShiftVB src shift));
8508 match(Set dst (RShiftVB src shift));
8509 match(Set dst (URShiftVB src shift));
8510 effect(TEMP dst, TEMP tmp, TEMP scratch);
8511 format %{"vextendbw $tmp,$src\n\t"
8512 "vshiftw $tmp,$tmp,$shift\n\t"
8513 "vpand $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t"
8514 "vextracti128_high $dst,$tmp\n\t"
8515 "vpackuswb $dst,$tmp,$dst\n\t! packed16B shift" %}
8516 ins_encode %{
8517 int opcode = this->as_Mach()->ideal_Opcode();
8518
8519 int vector_len = 1;
8520 __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister, vector_len);
8521 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
8522 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
8523 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
8524 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
8525 %}
8526 ins_pipe( pipe_slow );
8527 %}
8528
8529 instruct vshift32B_avx(vecY dst, vecY src, vecS shift, vecY tmp, rRegI scratch) %{
8530 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
8531 match(Set dst (LShiftVB src shift));
8532 match(Set dst (RShiftVB src shift));
8533 match(Set dst (URShiftVB src shift));
8534 effect(TEMP dst, TEMP tmp, TEMP scratch);
8535 format %{"vextracti128_high $tmp,$src\n\t"
8536 "vextendbw $tmp,$tmp\n\t"
8537 "vextendbw $dst,$src\n\t"
8538 "vshiftw $tmp,$tmp,$shift\n\t"
8539 "vshiftw $dst,$dst,$shift\n\t"
8540 "vpand $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t"
8541 "vpand $dst,$dst,[0x00ff00ff0x00ff00ff]\n\t"
8542 "vpackuswb $dst,$dst,$tmp\n\t"
8543 "vpermq $dst,$dst,0xD8\n\t! packed32B shift" %}
8544 ins_encode %{
8545 int opcode = this->as_Mach()->ideal_Opcode();
8546
8547 int vector_len = 1;
8548 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
8549 __ vextendbw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len);
8550 __ vextendbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len);
8551 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
8552 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vector_len);
8553 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
8554 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
8555 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
8556 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len);
8557 %}
8558 ins_pipe( pipe_slow );
8559 %}
8560
8561 instruct vshift64B_avx(vecZ dst, vecZ src, vecS shift, vecZ tmp1, vecZ tmp2, rRegI scratch) %{
8562 predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
8563 match(Set dst (LShiftVB src shift));
8564 match(Set dst (RShiftVB src shift));
8565 match(Set dst (URShiftVB src shift));
8566 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
8567 format %{"vextracti64x4 $tmp1,$src\n\t"
8568 "vextendbw $tmp1,$tmp1\n\t"
8569 "vextendbw $tmp2,$src\n\t"
8570 "vshiftw $tmp1,$tmp1,$shift\n\t"
8571 "vshiftw $tmp2,$tmp2,$shift\n\t"
8572 "vmovdqu $dst,[0x00ff00ff0x00ff00ff]\n\t"
8573 "vpbroadcastd $dst,$dst\n\t"
8574 "vpand $tmp1,$tmp1,$dst\n\t"
8575 "vpand $tmp2,$tmp2,$dst\n\t"
8576 "vpackuswb $dst,$tmp1,$tmp2\n\t"
8577 "evmovdquq $tmp2, [0x0604020007050301]\n\t"
8578 "vpermq $dst,$tmp2,$dst\n\t! packed64B shift" %}
8579 ins_encode %{
8580 int opcode = this->as_Mach()->ideal_Opcode();
8581
8582 int vector_len = 2;
8583 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
8584 __ vextendbw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len);
8585 __ vextendbw(opcode, $tmp2$$XMMRegister, $src$$XMMRegister, vector_len);
8586 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vector_len);
8587 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vector_len);
8588 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
8589 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
8590 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
8591 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
8592 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len);
8593 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register);
8594 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
8595 %}
8596 ins_pipe( pipe_slow );
8597 %}
8598
8599 // Shorts vector logical right shift produces incorrect Java result
8600 // for negative data because java code convert short value into int with
8601 // sign extension before a shift. But char vectors are fine since chars are
8602 // unsigned values.
8603 // Shorts/Chars vector left shift
8604 instruct vshist2S(vecS dst, vecS src, vecS shift) %{
8605 predicate(n->as_Vector()->length() == 2);
8606 match(Set dst (LShiftVS src shift));
8607 match(Set dst (RShiftVS src shift));
8608 match(Set dst (URShiftVS src shift));
8609 format %{ "vshiftw $dst,$src,$shift\t! shift packed2S" %}
8610 ins_encode %{
8611 int opcode = this->as_Mach()->ideal_Opcode();
8612 if (UseAVX == 0) {
8613 if ($dst$$XMMRegister != $src$$XMMRegister)
8614 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
8615 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
8616 } else {
8617 int vector_len = 0;
8618 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8619 }
8620 %}
8621 ins_pipe( pipe_slow );
8622 %}
8623
8624 instruct vshift4S(vecD dst, vecD src, vecS shift) %{
8625 predicate(n->as_Vector()->length() == 4);
8626 match(Set dst (LShiftVS src shift));
8627 match(Set dst (RShiftVS src shift));
8628 match(Set dst (URShiftVS src shift));
8629 format %{ "vshiftw $dst,$src,$shift\t! shift packed4S" %}
8630 ins_encode %{
8631 int opcode = this->as_Mach()->ideal_Opcode();
8632 if (UseAVX == 0) {
8633 if ($dst$$XMMRegister != $src$$XMMRegister)
8634 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
8635 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
8636
8637 } else {
8638 int vector_len = 0;
8639 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8640 }
8641 %}
8642 ins_pipe( pipe_slow );
8643 %}
8644
8645 instruct vshift8S(vecX dst, vecX src, vecS shift) %{
8646 predicate(n->as_Vector()->length() == 8);
8647 match(Set dst (LShiftVS src shift));
8648 match(Set dst (RShiftVS src shift));
8649 match(Set dst (URShiftVS src shift));
8650 format %{ "vshiftw $dst,$src,$shift\t! shift packed8S" %}
8651 ins_encode %{
8652 int opcode = this->as_Mach()->ideal_Opcode();
8653 if (UseAVX == 0) {
8654 if ($dst$$XMMRegister != $src$$XMMRegister)
8655 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
8656 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
8657 } else {
8658 int vector_len = 0;
8659 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8660 }
8661 %}
8662 ins_pipe( pipe_slow );
8663 %}
8664
8665 instruct vshift16S(vecY dst, vecY src, vecS shift) %{
8666 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
8667 match(Set dst (LShiftVS src shift));
8668 match(Set dst (RShiftVS src shift));
8669 match(Set dst (URShiftVS src shift));
8670 format %{ "vshiftw $dst,$src,$shift\t! shift packed16S" %}
8671 ins_encode %{
8672 int vector_len = 1;
8673 int opcode = this->as_Mach()->ideal_Opcode();
8674 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8675 %}
8676 ins_pipe( pipe_slow );
8677 %}
8678
8679 instruct vshift32S(vecZ dst, vecZ src, vecS shift) %{
8680 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
8681 match(Set dst (LShiftVS src shift));
8682 match(Set dst (RShiftVS src shift));
8683 match(Set dst (URShiftVS src shift));
8684 format %{ "vshiftw $dst,$src,$shift\t! shift packed32S" %}
8685 ins_encode %{
8686 int vector_len = 2;
8687 int opcode = this->as_Mach()->ideal_Opcode();
8688 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8689 %}
8690 ins_pipe( pipe_slow );
8691 %}
8692
8693 // Integers vector left shift
8694 instruct vshift2I(vecD dst, vecD src, vecS shift) %{
8695 predicate(n->as_Vector()->length() == 2);
8696 match(Set dst (LShiftVI src shift));
8697 match(Set dst (RShiftVI src shift));
8698 match(Set dst (URShiftVI src shift));
8699 format %{ "vshiftd $dst,$src,$shift\t! shift packed2I" %}
8700 ins_encode %{
8701 int opcode = this->as_Mach()->ideal_Opcode();
8702 if (UseAVX == 0) {
8703 if ($dst$$XMMRegister != $src$$XMMRegister)
8704 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
8705 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
8706 } else {
8707 int vector_len = 0;
8708 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8709 }
8710 %}
8711 ins_pipe( pipe_slow );
8712 %}
8713
8714 instruct vshift4I(vecX dst, vecX src, vecS shift) %{
8715 predicate(n->as_Vector()->length() == 4);
8716 match(Set dst (LShiftVI src shift));
8717 match(Set dst (RShiftVI src shift));
8718 match(Set dst (URShiftVI src shift));
8719 format %{ "vshiftd $dst,$src,$shift\t! shift packed4I" %}
8720 ins_encode %{
8721 int opcode = this->as_Mach()->ideal_Opcode();
8722 if (UseAVX == 0) {
8723 if ($dst$$XMMRegister != $src$$XMMRegister)
8724 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
8725 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
8726 } else {
8727 int vector_len = 0;
8728 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8729 }
8730 %}
8731 ins_pipe( pipe_slow );
8732 %}
8733
8734 instruct vshift8I(vecY dst, vecY src, vecS shift) %{
8735 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
8736 match(Set dst (LShiftVI src shift));
8737 match(Set dst (RShiftVI src shift));
8738 match(Set dst (URShiftVI src shift));
8739 format %{ "vshiftd $dst,$src,$shift\t! shift packed8I" %}
8740 ins_encode %{
8741 int vector_len = 1;
8742 int opcode = this->as_Mach()->ideal_Opcode();
8743 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8744 %}
8745 ins_pipe( pipe_slow );
8746 %}
8747
8748 instruct vshift16I(vecZ dst, vecZ src, vecS shift) %{
8749 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
8750 match(Set dst (LShiftVI src shift));
8751 match(Set dst (RShiftVI src shift));
8752 match(Set dst (URShiftVI src shift));
8753 format %{ "vshiftd $dst,$src,$shift\t! shift packed16I" %}
8754 ins_encode %{
8755 int vector_len = 2;
8756 int opcode = this->as_Mach()->ideal_Opcode();
8757 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8758 %}
8759 ins_pipe( pipe_slow );
8760 %}
8761
8762 // Longs vector shift
8763 instruct vshift2L(vecX dst, vecX src, vecS shift) %{
8764 predicate(n->as_Vector()->length() == 2);
8765 match(Set dst (LShiftVL src shift));
8766 match(Set dst (URShiftVL src shift));
8767 format %{ "vshiftq $dst,$src,$shift\t! shift packed2L" %}
8768 ins_encode %{
8769 int opcode = this->as_Mach()->ideal_Opcode();
8770 if (UseAVX == 0) {
8771 if ($dst$$XMMRegister != $src$$XMMRegister)
8772 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
8773 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
8774 } else {
8775 int vector_len = 0;
8776 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8777 }
8778 %}
8779 ins_pipe( pipe_slow );
8780 %}
8781
8782 instruct vshift4L(vecY dst, vecY src, vecS shift) %{
8783 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
8784 match(Set dst (LShiftVL src shift));
8785 match(Set dst (URShiftVL src shift));
8786 format %{ "vshiftq $dst,$src,$shift\t! left shift packed4L" %}
8787 ins_encode %{
8788 int vector_len = 1;
8789 int opcode = this->as_Mach()->ideal_Opcode();
8790 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8791 %}
8792 ins_pipe( pipe_slow );
8793 %}
8794
8795 instruct vshift8L(vecZ dst, vecZ src, vecS shift) %{
8796 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
8797 match(Set dst (LShiftVL src shift));
8798 match(Set dst (RShiftVL src shift));
8799 match(Set dst (URShiftVL src shift));
8800 format %{ "vshiftq $dst,$src,$shift\t! shift packed8L" %}
8801 ins_encode %{
8802 int vector_len = 2;
8803 int opcode = this->as_Mach()->ideal_Opcode();
8804 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8805 %}
8806 ins_pipe( pipe_slow );
8807 %}
8808
8809 // -------------------ArithmeticRightShift -----------------------------------
8810 // Long vector arithmetic right shift
8811 instruct vsra2L_reg(vecX dst, vecX src, vecS shift, vecX tmp, rRegI scratch) %{
8812 predicate(UseSSE >= 2 && n->as_Vector()->length() == 2);
8813 match(Set dst (RShiftVL src shift));
8814 effect(TEMP dst, TEMP tmp, TEMP scratch);
8815 format %{ "movdqu $dst,$src\n\t"
8816 "psrlq $dst,$shift\n\t"
8817 "movdqu $tmp,[0x8000000000000000]\n\t"
8818 "psrlq $tmp,$shift\n\t"
8819 "pxor $dst,$tmp\n\t"
8820 "psubq $dst,$tmp\t! arithmetic right shift packed2L" %}
8821 ins_encode %{
8822 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
8823 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
8824 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register);
8825 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
8826 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
8827 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
8828 %}
8829 ins_pipe( pipe_slow );
8830 %}
8831
8832 instruct vsra2L_reg_evex(vecX dst, vecX src, vecS shift) %{
8833 predicate(UseAVX > 2 && n->as_Vector()->length() == 2);
8834 match(Set dst (RShiftVL src shift));
8835 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %}
8836 ins_encode %{
8837 int vector_len = 0;
8838 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8839 %}
8840 ins_pipe( pipe_slow );
8841 %}
8842
8843 instruct vsra4L_reg(vecY dst, vecY src, vecS shift, vecY tmp, rRegI scratch) %{
8844 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
8845 match(Set dst (RShiftVL src shift));
8846 effect(TEMP dst, TEMP tmp, TEMP scratch);
8847 format %{ "vpsrlq $dst,$src,$shift\n\t"
8848 "vmovdqu $tmp,[0x8000000000000000]\n\t"
8849 "vpsrlq $tmp,$tmp,$shift\n\t"
8850 "vpxor $dst,$dst,$tmp\n\t"
8851 "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed4L" %}
8852 ins_encode %{
8853 int vector_len = 1;
8854 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8855 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register);
8856 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
8857 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
8858 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
8859 %}
8860 ins_pipe( pipe_slow );
8861 %}
8862
8863 instruct vsra4L_reg_evex(vecY dst, vecY src, vecS shift) %{
8864 predicate(UseAVX > 2 && n->as_Vector()->length() == 4);
8865 match(Set dst (RShiftVL src shift));
8866 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed4L" %}
8867 ins_encode %{
8868 int vector_len = 1;
8869 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8870 %}
8871 ins_pipe( pipe_slow );
8872 %}
8873
8874 // --------------------------------- AND --------------------------------------
8875
8876 instruct vand4B(vecS dst, vecS src) %{
8877 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4);
8878 match(Set dst (AndV dst src));
8879 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %}
8880 ins_encode %{
8881 __ pand($dst$$XMMRegister, $src$$XMMRegister);
8882 %}
8883 ins_pipe( pipe_slow );
8884 %}
8885
8886 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
8887 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
8888 match(Set dst (AndV src1 src2));
8889 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %}
8890 ins_encode %{
8891 int vector_len = 0;
8892 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8893 %}
8894 ins_pipe( pipe_slow );
8895 %}
8896
8897 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{
8898 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
8899 match(Set dst (AndV src (LoadVector mem)));
8900 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %}
8901 ins_encode %{
8902 int vector_len = 0;
8903 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8904 %}
8905 ins_pipe( pipe_slow );
8906 %}
8907
8908 instruct vand8B(vecD dst, vecD src) %{
8909 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8);
8910 match(Set dst (AndV dst src));
8911 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %}
8912 ins_encode %{
8913 __ pand($dst$$XMMRegister, $src$$XMMRegister);
8914 %}
8915 ins_pipe( pipe_slow );
8916 %}
8917
8918 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
8919 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
8920 match(Set dst (AndV src1 src2));
8921 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %}
8922 ins_encode %{
8923 int vector_len = 0;
8924 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8925 %}
8926 ins_pipe( pipe_slow );
8927 %}
8928
8929 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{
8930 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
8931 match(Set dst (AndV src (LoadVector mem)));
8932 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %}
8933 ins_encode %{
8934 int vector_len = 0;
8935 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8936 %}
8937 ins_pipe( pipe_slow );
8938 %}
8939
8940 instruct vand16B(vecX dst, vecX src) %{
8941 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16);
8942 match(Set dst (AndV dst src));
8943 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %}
8944 ins_encode %{
8945 __ pand($dst$$XMMRegister, $src$$XMMRegister);
8946 %}
8947 ins_pipe( pipe_slow );
8948 %}
8949
8950 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
8951 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
8952 match(Set dst (AndV src1 src2));
8953 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %}
8954 ins_encode %{
8955 int vector_len = 0;
9276 %}
9277
9278 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
9279 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
9280 match(Set dst (XorV src1 src2));
9281 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %}
9282 ins_encode %{
9283 int vector_len = 2;
9284 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
9285 %}
9286 ins_pipe( pipe_slow );
9287 %}
9288
9289 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{
9290 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
9291 match(Set dst (XorV src (LoadVector mem)));
9292 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %}
9293 ins_encode %{
9294 int vector_len = 2;
9295 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
9296 %}
9297 ins_pipe( pipe_slow );
9298 %}
9299
9300 // --------------------------------- ABS --------------------------------------
9301 // a = |a|
9302 instruct vabs4B_reg(vecS dst, vecS src) %{
9303 predicate(UseSSE > 2 && n->as_Vector()->length() == 4);
9304 match(Set dst (AbsVB src));
9305 format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed4B" %}
9306 ins_encode %{
9307 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
9308 %}
9309 ins_pipe( pipe_slow );
9310 %}
9311
9312 instruct vabs8B_reg(vecD dst, vecD src) %{
9313 predicate(UseSSE > 2 && n->as_Vector()->length() == 8);
9314 match(Set dst (AbsVB src));
9315 format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed8B" %}
9316 ins_encode %{
9317 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
9318 %}
9319 ins_pipe( pipe_slow );
9320 %}
9321
9322 instruct vabs16B_reg(vecX dst, vecX src) %{
9323 predicate(UseSSE > 2 && n->as_Vector()->length() == 16);
9324 match(Set dst (AbsVB src));
9325 format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed16B" %}
9326 ins_encode %{
9327 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
9328 %}
9329 ins_pipe( pipe_slow );
9330 %}
9331
9332 instruct vabs32B_reg(vecY dst, vecY src) %{
9333 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
9334 match(Set dst (AbsVB src));
9335 format %{ "vpabsb $dst,$src\t# $dst = |$src| abs packed32B" %}
9336 ins_encode %{
9337 int vector_len = 1;
9338 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len);
9339 %}
9340 ins_pipe( pipe_slow );
9341 %}
9342
9343 instruct vabs64B_reg(vecZ dst, vecZ src) %{
9344 predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
9345 match(Set dst (AbsVB src));
9346 format %{ "vpabsb $dst,$src\t# $dst = |$src| abs packed64B" %}
9347 ins_encode %{
9348 int vector_len = 2;
9349 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len);
9350 %}
9351 ins_pipe( pipe_slow );
9352 %}
9353
9354 instruct vabs2S_reg(vecD dst, vecD src) %{
9355 predicate(UseSSE > 2 && n->as_Vector()->length() == 2);
9356 match(Set dst (AbsVS src));
9357 format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed2S" %}
9358 ins_encode %{
9359 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
9360 %}
9361 ins_pipe( pipe_slow );
9362 %}
9363
9364 instruct vabs4S_reg(vecD dst, vecD src) %{
9365 predicate(UseSSE > 2 && n->as_Vector()->length() == 4);
9366 match(Set dst (AbsVS src));
9367 format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed4S" %}
9368 ins_encode %{
9369 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
9370 %}
9371 ins_pipe( pipe_slow );
9372 %}
9373
9374 instruct vabs8S_reg(vecX dst, vecX src) %{
9375 predicate(UseSSE > 2 && n->as_Vector()->length() == 8);
9376 match(Set dst (AbsVS src));
9377 format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed8S" %}
9378 ins_encode %{
9379 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
9380 %}
9381 ins_pipe( pipe_slow );
9382 %}
9383
9384 instruct vabs16S_reg(vecY dst, vecY src) %{
9385 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
9386 match(Set dst (AbsVS src));
9387 format %{ "vpabsw $dst,$src\t# $dst = |$src| abs packed16S" %}
9388 ins_encode %{
9389 int vector_len = 1;
9390 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len);
9391 %}
9392 ins_pipe( pipe_slow );
9393 %}
9394
9395 instruct vabs32S_reg(vecZ dst, vecZ src) %{
9396 predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
9397 match(Set dst (AbsVS src));
9398 format %{ "vpabsw $dst,$src\t# $dst = |$src| abs packed32S" %}
9399 ins_encode %{
9400 int vector_len = 2;
9401 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len);
9402 %}
9403 ins_pipe( pipe_slow );
9404 %}
9405
9406 instruct vabs2I_reg(vecD dst, vecD src) %{
9407 predicate(UseSSE > 2 && n->as_Vector()->length() == 2);
9408 match(Set dst (AbsVI src));
9409 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed2I" %}
9410 ins_encode %{
9411 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
9412 %}
9413 ins_pipe( pipe_slow );
9414 %}
9415
9416 instruct vabs4I_reg(vecX dst, vecX src) %{
9417 predicate(UseSSE > 2 && n->as_Vector()->length() == 4);
9418 match(Set dst (AbsVI src));
9419 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed4I" %}
9420 ins_encode %{
9421 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
9422 %}
9423 ins_pipe( pipe_slow );
9424 %}
9425
9426 instruct vabs8I_reg(vecY dst, vecY src) %{
9427 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
9428 match(Set dst (AbsVI src));
9429 format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed8I" %}
9430 ins_encode %{
9431 int vector_len = 1;
9432 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
9433 %}
9434 ins_pipe( pipe_slow );
9435 %}
9436
9437 instruct vabs16I_reg(vecZ dst, vecZ src) %{
9438 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
9439 match(Set dst (AbsVI src));
9440 format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed16I" %}
9441 ins_encode %{
9442 int vector_len = 2;
9443 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
9444 %}
9445 ins_pipe( pipe_slow );
9446 %}
9447
9448 instruct vabs2L_reg(vecX dst, vecX src) %{
9449 predicate(UseAVX > 2 && n->as_Vector()->length() == 2);
9450 match(Set dst (AbsVL src));
9451 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed2L" %}
9452 ins_encode %{
9453 int vector_len = 0;
9454 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
9455 %}
9456 ins_pipe( pipe_slow );
9457 %}
9458
9459 instruct vabs4L_reg(vecY dst, vecY src) %{
9460 predicate(UseAVX > 2 && n->as_Vector()->length() == 4);
9461 match(Set dst (AbsVL src));
9462 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed4L" %}
9463 ins_encode %{
9464 int vector_len = 1;
9465 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
9466 %}
9467 ins_pipe( pipe_slow );
9468 %}
9469
9470 instruct vabs8L_reg(vecZ dst, vecZ src) %{
9471 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
9472 match(Set dst (AbsVL src));
9473 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed8L" %}
9474 ins_encode %{
9475 int vector_len = 2;
9476 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
9477 %}
9478 ins_pipe( pipe_slow );
9479 %}
9480
9481 // --------------------------------- ABSNEG --------------------------------------
9482
9483 instruct vabsneg2D(vecX dst, vecX src, rRegI scratch) %{
9484 predicate(UseSSE >= 2 && n->as_Vector()->length() == 2);
9485 match(Set dst (AbsVD src));
9486 match(Set dst (NegVD src));
9487 effect(TEMP scratch);
9488 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packed2D" %}
9489 ins_encode %{
9490 int opcode = this->as_Mach()->ideal_Opcode();
9491 if ($dst$$XMMRegister != $src$$XMMRegister)
9492 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
9493 __ vabsnegd(opcode, $dst$$XMMRegister, $scratch$$Register);
9494 %}
9495 ins_pipe( pipe_slow );
9496 %}
9497
9498 instruct vabsneg4D(vecY dst, vecY src, rRegI scratch) %{
9499 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
9500 match(Set dst (AbsVD src));
9501 match(Set dst (NegVD src));
9502 effect(TEMP scratch);
9503 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packed4D" %}
9504 ins_encode %{
9505 int opcode = this->as_Mach()->ideal_Opcode();
9506 int vector_len = 1;
9507 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register);
9508 %}
9509 ins_pipe( pipe_slow );
9510 %}
9511
9512 instruct vabsneg8D(vecZ dst, vecZ src, rRegI scratch) %{
9513 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
9514 match(Set dst (AbsVD src));
9515 match(Set dst (NegVD src));
9516 effect(TEMP scratch);
9517 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packed8D" %}
9518 ins_encode %{
9519 int opcode = this->as_Mach()->ideal_Opcode();
9520 int vector_len = 2;
9521 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register);
9522 %}
9523 ins_pipe( pipe_slow );
9524 %}
9525
9526 instruct vabsneg2F(vecD dst, vecD src, rRegI scratch) %{
9527 predicate(UseSSE > 0 && n->as_Vector()->length() == 2);
9528 match(Set dst (AbsVF src));
9529 match(Set dst (NegVF src));
9530 effect(TEMP scratch);
9531 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packed2F" %}
9532 ins_encode %{
9533 int opcode = this->as_Mach()->ideal_Opcode();
9534 if ($dst$$XMMRegister != $src$$XMMRegister)
9535 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
9536 __ vabsnegf(opcode, $dst$$XMMRegister, $scratch$$Register);
9537 %}
9538 ins_pipe( pipe_slow );
9539 %}
9540
9541 instruct vabsneg4F(vecX dst, rRegI scratch) %{
9542 predicate(UseSSE > 0 && n->as_Vector()->length() == 4);
9543 match(Set dst (AbsVF dst));
9544 match(Set dst (NegVF dst));
9545 effect(TEMP scratch);
9546 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
9547 ins_cost(150);
9548 ins_encode %{
9549 int opcode = this->as_Mach()->ideal_Opcode();
9550 __ vabsnegf(opcode, $dst$$XMMRegister, $scratch$$Register);
9551 %}
9552 ins_pipe( pipe_slow );
9553 %}
9554
9555 instruct vabsneg8F(vecY dst, vecY src, rRegI scratch) %{
9556 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
9557 match(Set dst (AbsVF src));
9558 match(Set dst (NegVF src));
9559 effect(TEMP scratch);
9560 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packed8F" %}
9561 ins_cost(150);
9562 ins_encode %{
9563 int opcode = this->as_Mach()->ideal_Opcode();
9564 int vector_len = 1;
9565 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register);
9566 %}
9567 ins_pipe( pipe_slow );
9568 %}
9569
9570 instruct vabsneg16F(vecZ dst, vecZ src, rRegI scratch) %{
9571 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
9572 match(Set dst (AbsVF src));
9573 match(Set dst (NegVF src));
9574 effect(TEMP scratch);
9575 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packed16F" %}
9576 ins_cost(150);
9577 ins_encode %{
9578 int opcode = this->as_Mach()->ideal_Opcode();
9579 int vector_len = 2;
9580 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register);
9581 %}
9582 ins_pipe( pipe_slow );
9583 %}
9584
9585 // --------------------------------- FMA --------------------------------------
9586
9587 // a * b + c
9588 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{
9589 predicate(UseFMA && n->as_Vector()->length() == 2);
9590 match(Set c (FmaVD c (Binary a b)));
9591 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %}
9592 ins_cost(150);
9593 ins_encode %{
9594 int vector_len = 0;
9595 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len);
9596 %}
9597 ins_pipe( pipe_slow );
9598 %}
9599
9600 // a * b + c
|