hotspot Sdiff src/cpu/x86/vm

src/cpu/x86/vm/assembler_x86.cpp

  37 #include "runtime/sharedRuntime.hpp"
  38 #include "runtime/stubRoutines.hpp"
  39 #include "utilities/macros.hpp"
  40 #if INCLUDE_ALL_GCS
  41 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  42 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
  43 #include "gc_implementation/g1/heapRegion.hpp"
  44 #endif // INCLUDE_ALL_GCS
  45 
  46 #ifdef PRODUCT
  47 #define BLOCK_COMMENT(str) /* nothing */
  48 #define STOP(error) stop(error)
  49 #else
  50 #define BLOCK_COMMENT(str) block_comment(str)
  51 #define STOP(error) block_comment(error); stop(error)
  52 #endif
  53 
  54 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  55 // Implementation of AddressLiteral
  56 






























  57 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  58   _is_lval = false;
  59   _target = target;
  60   switch (rtype) {
  61   case relocInfo::oop_type:
  62   case relocInfo::metadata_type:
  63     // Oops are a special case. Normally they would be their own section
  64     // but in cases like icBuffer they are literals in the code stream that
  65     // we don't have a section for. We use none so that we get a literal address
  66     // which is always patchable.
  67     break;
  68   case relocInfo::external_word_type:
  69     _rspec = external_word_Relocation::spec(target);
  70     break;
  71   case relocInfo::internal_word_type:
  72     _rspec = internal_word_Relocation::spec(target);
  73     break;
  74   case relocInfo::opt_virtual_call_type:
  75     _rspec = opt_virtual_call_Relocation::spec();
  76     break;

 256   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 257   if (is8bit(imm32)) {
 258     emit_int8(op1 | 0x02); // set sign bit
 259     emit_operand(rm, adr, 1);
 260     emit_int8(imm32 & 0xFF);
 261   } else {
 262     emit_int8(op1);
 263     emit_operand(rm, adr, 4);
 264     emit_int32(imm32);
 265   }
 266 }
 267 
 268 
 269 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 270   assert(isByte(op1) && isByte(op2), "wrong opcode");
 271   emit_int8(op1);
 272   emit_int8(op2 | encode(dst) << 3 | encode(src));
 273 }
 274 
 275 





















































































 276 void Assembler::emit_operand(Register reg, Register base, Register index,
 277                              Address::ScaleFactor scale, int disp,
 278                              RelocationHolder const& rspec,
 279                              int rip_relative_correction) {
 280   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 281 
 282   // Encode the registers as needed in the fields they are used in
 283 
 284   int regenc = encode(reg) << 3;
 285   int indexenc = index->is_valid() ? encode(index) << 3 : 0;
 286   int baseenc = base->is_valid() ? encode(base) : 0;
 287 
 288   if (base->is_valid()) {
 289     if (index->is_valid()) {
 290       assert(scale != Address::no_scale, "inconsistent address");
 291       // [base + index*scale + disp]
 292       if (disp == 0 && rtype == relocInfo::none  &&
 293           base != rbp LP64_ONLY(&& base != r13)) {
 294         // [base + index*scale]
 295         // [00 reg 100][ss index base]
 296         assert(index != rsp, "illegal addressing mode");
 297         emit_int8(0x04 | regenc);
 298         emit_int8(scale << 6 | indexenc | baseenc);
 299       } else if (is8bit(disp) && rtype == relocInfo::none) {
 300         // [base + index*scale + imm8]
 301         // [01 reg 100][ss index base] imm8
 302         assert(index != rsp, "illegal addressing mode");
 303         emit_int8(0x44 | regenc);
 304         emit_int8(scale << 6 | indexenc | baseenc);
 305         emit_int8(disp & 0xFF);
 306       } else {
 307         // [base + index*scale + disp32]
 308         // [10 reg 100][ss index base] disp32
 309         assert(index != rsp, "illegal addressing mode");
 310         emit_int8(0x84 | regenc);
 311         emit_int8(scale << 6 | indexenc | baseenc);
 312         emit_data(disp, rspec, disp32_operand);
 313       }
 314     } else if (base == rsp LP64_ONLY(|| base == r12)) {
 315       // [rsp + disp]
 316       if (disp == 0 && rtype == relocInfo::none) {
 317         // [rsp]
 318         // [00 reg 100][00 100 100]
 319         emit_int8(0x04 | regenc);
 320         emit_int8(0x24);
 321       } else if (is8bit(disp) && rtype == relocInfo::none) {
 322         // [rsp + imm8]
 323         // [01 reg 100][00 100 100] disp8
 324         emit_int8(0x44 | regenc);
 325         emit_int8(0x24);
 326         emit_int8(disp & 0xFF);
 327       } else {
 328         // [rsp + imm32]
 329         // [10 reg 100][00 100 100] disp32
 330         emit_int8(0x84 | regenc);
 331         emit_int8(0x24);
 332         emit_data(disp, rspec, disp32_operand);
 333       }
 334     } else {
 335       // [base + disp]
 336       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
 337       if (disp == 0 && rtype == relocInfo::none &&
 338           base != rbp LP64_ONLY(&& base != r13)) {
 339         // [base]
 340         // [00 reg base]
 341         emit_int8(0x00 | regenc | baseenc);
 342       } else if (is8bit(disp) && rtype == relocInfo::none) {
 343         // [base + disp8]
 344         // [01 reg base] disp8
 345         emit_int8(0x40 | regenc | baseenc);
 346         emit_int8(disp & 0xFF);
 347       } else {
 348         // [base + disp32]
 349         // [10 reg base] disp32
 350         emit_int8(0x80 | regenc | baseenc);
 351         emit_data(disp, rspec, disp32_operand);
 352       }
 353     }
 354   } else {
 355     if (index->is_valid()) {
 356       assert(scale != Address::no_scale, "inconsistent address");
 357       // [index*scale + disp]
 358       // [00 reg 100][ss index 101] disp32
 359       assert(index != rsp, "illegal addressing mode");
 360       emit_int8(0x04 | regenc);
 361       emit_int8(scale << 6 | indexenc | 0x05);
 362       emit_data(disp, rspec, disp32_operand);

 372       // at the start of the instruction. That needs more correction here.
 373       // intptr_t disp = target - next_ip;
 374       assert(inst_mark() != NULL, "must be inside InstructionMark");
 375       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 376       int64_t adjusted = disp;
 377       // Do rip-rel adjustment for 64bit
 378       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 379       assert(is_simm32(adjusted),
 380              "must be 32bit offset (RIP relative address)");
 381       emit_data((int32_t) adjusted, rspec, disp32_operand);
 382 
 383     } else {
 384       // 32bit never did this, did everything as the rip-rel/disp code above
 385       // [disp] ABSOLUTE
 386       // [00 reg 100][00 100 101] disp32
 387       emit_int8(0x04 | regenc);
 388       emit_int8(0x25);
 389       emit_data(disp, rspec, disp32_operand);
 390     }
 391   }

 392 }
 393 
 394 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 395                              Address::ScaleFactor scale, int disp,
 396                              RelocationHolder const& rspec) {








 397   emit_operand((Register)reg, base, index, scale, disp, rspec);
 398 }
 399 
 400 // Secret local extension to Assembler::WhichOperand:
 401 #define end_pc_operand (_WhichOperand_limit)
 402 
 403 address Assembler::locate_operand(address inst, WhichOperand which) {
 404   // Decode the given instruction, and return the address of
 405   // an embedded 32-bit operand word.
 406 
 407   // If "which" is disp32_operand, selects the displacement portion
 408   // of an effective address specifier.
 409   // If "which" is imm64_operand, selects the trailing immediate constant.
 410   // If "which" is call32_operand, selects the displacement of a call or jump.
 411   // Caller is responsible for ensuring that there is such an operand,
 412   // and that it is 32/64 bits wide.
 413 
 414   // If "which" is end_pc_operand, find the end of the instruction.
 415 
 416   address ip = inst;

 669     // First byte
 670     if ((0xFF & *inst) == VEX_3bytes) {
 671       ip++; // third byte
 672       is_64bit = ((VEX_W & *ip) == VEX_W);
 673     }
 674     ip++; // opcode
 675     // To find the end of instruction (which == end_pc_operand).
 676     switch (0xFF & *ip) {
 677     case 0x61: // pcmpestri r, r/a, #8
 678     case 0x70: // pshufd r, r/a, #8
 679     case 0x73: // psrldq r, #8
 680       tail_size = 1;  // the imm8
 681       break;
 682     default:
 683       break;
 684     }
 685     ip++; // skip opcode
 686     debug_only(has_disp32 = true); // has both kinds of operands!
 687     break;
 688 























 689   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 690   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 691   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 692   case 0xDD: // fld_d a; fst_d a; fstp_d a
 693   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 694   case 0xDF: // fild_d a; fistp_d a
 695   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 696   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 697   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 698     debug_only(has_disp32 = true);
 699     break;
 700 
 701   case 0xE8: // call rdisp32
 702   case 0xE9: // jmp  rdisp32
 703     if (which == end_pc_operand)  return ip + 4;
 704     assert(which == call32_operand, "call has no disp32 or imm");
 705     return ip;
 706 
 707   case 0xF0:                    // Lock
 708     assert(os::is_MP(), "only on MP");

 968   emit_int8(0x0F);
 969   emit_int8(0x1F);
 970   emit_int8((unsigned char)0x80);
 971                    // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
 972   emit_int32(0);   // 32-bits offset (4 bytes)
 973 }
 974 
 975 void Assembler::addr_nop_8() {
 976   assert(UseAddressNop, "no CPU support");
 977   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
 978   emit_int8(0x0F);
 979   emit_int8(0x1F);
 980   emit_int8((unsigned char)0x84);
 981                    // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
 982   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
 983   emit_int32(0);   // 32-bits offset (4 bytes)
 984 }
 985 
 986 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
 987   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



 988   emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);

 989 }
 990 
 991 void Assembler::addsd(XMMRegister dst, Address src) {
 992   NOT_LP64(assert(VM_Version::supports_sse2(), ""));





 993   emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);

 994 }
 995 
 996 void Assembler::addss(XMMRegister dst, XMMRegister src) {
 997   NOT_LP64(assert(VM_Version::supports_sse(), ""));
 998   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
 999 }
1000 
1001 void Assembler::addss(XMMRegister dst, Address src) {
1002   NOT_LP64(assert(VM_Version::supports_sse(), ""));




1003   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
1004 }
1005 
1006 void Assembler::aesdec(XMMRegister dst, Address src) {
1007   assert(VM_Version::supports_aes(), "");
1008   InstructionMark im(this);
1009   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1010   emit_int8((unsigned char)0xDE);
1011   emit_operand(dst, src);
1012 }
1013 
1014 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1015   assert(VM_Version::supports_aes(), "");
1016   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1017   emit_int8((unsigned char)0xDE);
1018   emit_int8(0xC0 | encode);
1019 }
1020 
1021 void Assembler::aesdeclast(XMMRegister dst, Address src) {
1022   assert(VM_Version::supports_aes(), "");
1023   InstructionMark im(this);
1024   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1025   emit_int8((unsigned char)0xDF);
1026   emit_operand(dst, src);
1027 }
1028 
1029 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1030   assert(VM_Version::supports_aes(), "");
1031   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1032   emit_int8((unsigned char)0xDF);
1033   emit_int8((unsigned char)(0xC0 | encode));
1034 }
1035 
1036 void Assembler::aesenc(XMMRegister dst, Address src) {
1037   assert(VM_Version::supports_aes(), "");
1038   InstructionMark im(this);
1039   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1040   emit_int8((unsigned char)0xDC);
1041   emit_operand(dst, src);
1042 }
1043 
1044 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1045   assert(VM_Version::supports_aes(), "");
1046   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1047   emit_int8((unsigned char)0xDC);
1048   emit_int8(0xC0 | encode);
1049 }
1050 
1051 void Assembler::aesenclast(XMMRegister dst, Address src) {
1052   assert(VM_Version::supports_aes(), "");
1053   InstructionMark im(this);
1054   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1055   emit_int8((unsigned char)0xDD);
1056   emit_operand(dst, src);
1057 }
1058 
1059 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1060   assert(VM_Version::supports_aes(), "");
1061   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1062   emit_int8((unsigned char)0xDD);
1063   emit_int8((unsigned char)(0xC0 | encode));
1064 }
1065 
1066 
1067 void Assembler::andl(Address dst, int32_t imm32) {
1068   InstructionMark im(this);
1069   prefix(dst);
1070   emit_int8((unsigned char)0x81);
1071   emit_operand(rsp, dst, 4);
1072   emit_int32(imm32);
1073 }
1074 
1075 void Assembler::andl(Register dst, int32_t imm32) {
1076   prefix(dst);
1077   emit_arith(0x81, 0xE0, dst, imm32);
1078 }
1079 
1080 void Assembler::andl(Register dst, Address src) {
1081   InstructionMark im(this);
1082   prefix(src, dst);
1083   emit_int8(0x23);
1084   emit_operand(dst, src);
1085 }
1086 
1087 void Assembler::andl(Register dst, Register src) {
1088   (void) prefix_and_encode(dst->encoding(), src->encoding());
1089   emit_arith(0x23, 0xC0, dst, src);
1090 }
1091 
1092 void Assembler::andnl(Register dst, Register src1, Register src2) {
1093   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1094   int encode = vex_prefix_0F38_and_encode(dst, src1, src2);
1095   emit_int8((unsigned char)0xF2);
1096   emit_int8((unsigned char)(0xC0 | encode));
1097 }
1098 
1099 void Assembler::andnl(Register dst, Register src1, Address src2) {
1100   InstructionMark im(this);
1101   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1102   vex_prefix_0F38(dst, src1, src2);
1103   emit_int8((unsigned char)0xF2);
1104   emit_operand(dst, src2);
1105 }
1106 
1107 void Assembler::bsfl(Register dst, Register src) {
1108   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1109   emit_int8(0x0F);
1110   emit_int8((unsigned char)0xBC);
1111   emit_int8((unsigned char)(0xC0 | encode));
1112 }
1113 
1114 void Assembler::bsrl(Register dst, Register src) {
1115   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1116   emit_int8(0x0F);
1117   emit_int8((unsigned char)0xBD);
1118   emit_int8((unsigned char)(0xC0 | encode));
1119 }
1120 
1121 void Assembler::bswapl(Register reg) { // bswap
1122   int encode = prefix_and_encode(reg->encoding());
1123   emit_int8(0x0F);
1124   emit_int8((unsigned char)(0xC8 | encode));
1125 }
1126 
1127 void Assembler::blsil(Register dst, Register src) {
1128   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1129   int encode = vex_prefix_0F38_and_encode(rbx, dst, src);
1130   emit_int8((unsigned char)0xF3);
1131   emit_int8((unsigned char)(0xC0 | encode));
1132 }
1133 
1134 void Assembler::blsil(Register dst, Address src) {
1135   InstructionMark im(this);
1136   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1137   vex_prefix_0F38(rbx, dst, src);
1138   emit_int8((unsigned char)0xF3);
1139   emit_operand(rbx, src);
1140 }
1141 
1142 void Assembler::blsmskl(Register dst, Register src) {
1143   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1144   int encode = vex_prefix_0F38_and_encode(rdx, dst, src);
1145   emit_int8((unsigned char)0xF3);
1146   emit_int8((unsigned char)(0xC0 | encode));
1147 }
1148 
1149 void Assembler::blsmskl(Register dst, Address src) {
1150   InstructionMark im(this);
1151   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1152   vex_prefix_0F38(rdx, dst, src);
1153   emit_int8((unsigned char)0xF3);
1154   emit_operand(rdx, src);
1155 }
1156 
1157 void Assembler::blsrl(Register dst, Register src) {
1158   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1159   int encode = vex_prefix_0F38_and_encode(rcx, dst, src);
1160   emit_int8((unsigned char)0xF3);
1161   emit_int8((unsigned char)(0xC0 | encode));
1162 }
1163 
1164 void Assembler::blsrl(Register dst, Address src) {
1165   InstructionMark im(this);
1166   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1167   vex_prefix_0F38(rcx, dst, src);
1168   emit_int8((unsigned char)0xF3);
1169   emit_operand(rcx, src);
1170 }
1171 
1172 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1173   // suspect disp32 is always good
1174   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1175 
1176   if (L.is_bound()) {
1177     const int long_size = 5;
1178     int offs = (int)( target(L) - pc() );
1179     assert(offs <= 0, "assembler error");
1180     InstructionMark im(this);
1181     // 1110 1000 #32-bit disp
1182     emit_int8((unsigned char)0xE8);
1183     emit_data(offs - long_size, rtype, operand);
1184   } else {
1185     InstructionMark im(this);
1186     // 1110 1000 #32-bit disp
1187     L.add_patch_at(code(), locator());

1295   emit_int8(0x0F);
1296   emit_int8((unsigned char)0xB1);
1297   emit_operand(reg, adr);
1298 }
1299 
1300 // The 8-bit cmpxchg compares the value at adr with the contents of rax,
1301 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1302 // The ZF is set if the compared values were equal, and cleared otherwise.
1303 void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg
1304   InstructionMark im(this);
1305   prefix(adr, reg, true);
1306   emit_int8(0x0F);
1307   emit_int8((unsigned char)0xB0);
1308   emit_operand(reg, adr);
1309 }
1310 
1311 void Assembler::comisd(XMMRegister dst, Address src) {
1312   // NOTE: dbx seems to decode this as comiss even though the
1313   // 0x66 is there. Strangly ucomisd comes out correct
1314   NOT_LP64(assert(VM_Version::supports_sse2(), ""));





1315   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);

1316 }
1317 
1318 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1319   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



1320   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);

1321 }
1322 
1323 void Assembler::comiss(XMMRegister dst, Address src) {




1324   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1325   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
1326 }
1327 
1328 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1329   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1330   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
1331 }
1332 
1333 void Assembler::cpuid() {
1334   emit_int8(0x0F);
1335   emit_int8((unsigned char)0xA2);
1336 }
1337 
1338 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1339   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1340   emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3);
1341 }
1342 
1343 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1344   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1345   emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE);
1346 }
1347 
1348 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1349   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



1350   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);

1351 }
1352 
1353 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1354   NOT_LP64(assert(VM_Version::supports_sse2(), ""));





1355   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);

1356 }
1357 
1358 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1359   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1360   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);





1361   emit_int8(0x2A);
1362   emit_int8((unsigned char)(0xC0 | encode));
1363 }
1364 
1365 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1366   NOT_LP64(assert(VM_Version::supports_sse2(), ""));





1367   emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);

1368 }
1369 
1370 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1371   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1372   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
1373   emit_int8(0x2A);
1374   emit_int8((unsigned char)(0xC0 | encode));
1375 }
1376 
1377 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {




1378   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1379   emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3);
1380 }
1381 
1382 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1383   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1384   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1385 }
1386 
1387 void Assembler::cvtss2sd(XMMRegister dst, Address src) {




1388   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1389   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1390 }
1391 
1392 
1393 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1394   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1395   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
1396   emit_int8(0x2C);
1397   emit_int8((unsigned char)(0xC0 | encode));
1398 }
1399 
1400 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1401   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1402   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
1403   emit_int8(0x2C);
1404   emit_int8((unsigned char)(0xC0 | encode));
1405 }
1406 
1407 void Assembler::decl(Address dst) {
1408   // Don't use it directly. Use MacroAssembler::decrement() instead.
1409   InstructionMark im(this);
1410   prefix(dst);
1411   emit_int8((unsigned char)0xFF);
1412   emit_operand(rcx, dst);
1413 }
1414 
1415 void Assembler::divsd(XMMRegister dst, Address src) {
1416   NOT_LP64(assert(VM_Version::supports_sse2(), ""));





1417   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);

1418 }
1419 
1420 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1421   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



1422   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);

1423 }
1424 
1425 void Assembler::divss(XMMRegister dst, Address src) {




1426   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1427   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1428 }
1429 
1430 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1431   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1432   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1433 }
1434 
1435 void Assembler::emms() {
1436   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1437   emit_int8(0x0F);
1438   emit_int8(0x77);
1439 }
1440 
1441 void Assembler::hlt() {
1442   emit_int8((unsigned char)0xF4);
1443 }
1444 
1445 void Assembler::idivl(Register src) {

1658   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1659   emit_int8(0x0F);
1660   emit_int8((unsigned char)0xBD);
1661   emit_int8((unsigned char)(0xC0 | encode));
1662 }
1663 
1664 // Emit mfence instruction
1665 void Assembler::mfence() {
1666   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
1667   emit_int8(0x0F);
1668   emit_int8((unsigned char)0xAE);
1669   emit_int8((unsigned char)0xF0);
1670 }
1671 
1672 void Assembler::mov(Register dst, Register src) {
1673   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
1674 }
1675 
1676 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1677   NOT_LP64(assert(VM_Version::supports_sse2(), ""));




1678   emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);

1679 }
1680 
1681 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1682   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1683   emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE);
1684 }
1685 
1686 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
1687   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1688   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE);

1689   emit_int8(0x16);
1690   emit_int8((unsigned char)(0xC0 | encode));
1691 }
1692 
1693 void Assembler::movb(Register dst, Address src) {
1694   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
1695   InstructionMark im(this);
1696   prefix(src, dst, true);
1697   emit_int8((unsigned char)0x8A);
1698   emit_operand(dst, src);
1699 }
1700 































1701 
1702 void Assembler::movb(Address dst, int imm8) {
1703   InstructionMark im(this);
1704    prefix(dst);
1705   emit_int8((unsigned char)0xC6);
1706   emit_operand(rax, dst, 1);
1707   emit_int8(imm8);
1708 }
1709 
1710 
1711 void Assembler::movb(Address dst, Register src) {
1712   assert(src->has_byte_register(), "must have byte register");
1713   InstructionMark im(this);
1714   prefix(dst, src, true);
1715   emit_int8((unsigned char)0x88);
1716   emit_operand(src, dst);
1717 }
1718 
1719 void Assembler::movdl(XMMRegister dst, Register src) {
1720   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1721   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
1722   emit_int8(0x6E);
1723   emit_int8((unsigned char)(0xC0 | encode));
1724 }
1725 
1726 void Assembler::movdl(Register dst, XMMRegister src) {
1727   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1728   // swap src/dst to get correct prefix
1729   int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66);
1730   emit_int8(0x7E);
1731   emit_int8((unsigned char)(0xC0 | encode));
1732 }
1733 
1734 void Assembler::movdl(XMMRegister dst, Address src) {
1735   NOT_LP64(assert(VM_Version::supports_sse2(), ""));




1736   InstructionMark im(this);
1737   simd_prefix(dst, src, VEX_SIMD_66);
1738   emit_int8(0x6E);
1739   emit_operand(dst, src);
1740 }
1741 
1742 void Assembler::movdl(Address dst, XMMRegister src) {
1743   NOT_LP64(assert(VM_Version::supports_sse2(), ""));




1744   InstructionMark im(this);
1745   simd_prefix(dst, src, VEX_SIMD_66);
1746   emit_int8(0x7E);
1747   emit_operand(src, dst);
1748 }
1749 
1750 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
1751   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1752   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
1753 }
1754 
1755 void Assembler::movdqa(XMMRegister dst, Address src) {
1756   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



1757   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
1758 }
1759 
1760 void Assembler::movdqu(XMMRegister dst, Address src) {
1761   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



1762   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
1763 }
1764 
1765 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
1766   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1767   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
1768 }
1769 
1770 void Assembler::movdqu(Address dst, XMMRegister src) {
1771   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



1772   InstructionMark im(this);
1773   simd_prefix(dst, src, VEX_SIMD_F3);
1774   emit_int8(0x7F);
1775   emit_operand(src, dst);
1776 }
1777 
1778 // Move Unaligned 256bit Vector
1779 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
1780   assert(UseAVX > 0, "");
1781   bool vector256 = true;
1782   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256);



1783   emit_int8(0x6F);
1784   emit_int8((unsigned char)(0xC0 | encode));
1785 }
1786 
1787 void Assembler::vmovdqu(XMMRegister dst, Address src) {
1788   assert(UseAVX > 0, "");



1789   InstructionMark im(this);
1790   bool vector256 = true;
1791   vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256);
1792   emit_int8(0x6F);
1793   emit_operand(dst, src);
1794 }
1795 
1796 void Assembler::vmovdqu(Address dst, XMMRegister src) {
1797   assert(UseAVX > 0, "");




































1798   InstructionMark im(this);
1799   bool vector256 = true;
1800   // swap src<->dst for encoding
1801   assert(src != xnoreg, "sanity");
1802   vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256);
1803   emit_int8(0x7F);
1804   emit_operand(src, dst);
1805 }
1806 
1807 // Uses zero extension on 64bit
1808 
1809 void Assembler::movl(Register dst, int32_t imm32) {
1810   int encode = prefix_and_encode(dst->encoding());
1811   emit_int8((unsigned char)(0xB8 | encode));
1812   emit_int32(imm32);
1813 }
1814 
1815 void Assembler::movl(Register dst, Register src) {
1816   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1817   emit_int8((unsigned char)0x8B);
1818   emit_int8((unsigned char)(0xC0 | encode));
1819 }
1820 
1821 void Assembler::movl(Register dst, Address src) {
1822   InstructionMark im(this);

1828 void Assembler::movl(Address dst, int32_t imm32) {
1829   InstructionMark im(this);
1830   prefix(dst);
1831   emit_int8((unsigned char)0xC7);
1832   emit_operand(rax, dst, 4);
1833   emit_int32(imm32);
1834 }
1835 
1836 void Assembler::movl(Address dst, Register src) {
1837   InstructionMark im(this);
1838   prefix(dst, src);
1839   emit_int8((unsigned char)0x89);
1840   emit_operand(src, dst);
1841 }
1842 
1843 // New cpus require to use movsd and movss to avoid partial register stall
1844 // when loading from memory. But for old Opteron use movlpd instead of movsd.
1845 // The selection is done in MacroAssembler::movdbl() and movflt().
1846 void Assembler::movlpd(XMMRegister dst, Address src) {
1847   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1848   emit_simd_arith(0x12, dst, src, VEX_SIMD_66);




1849 }
1850 
1851 void Assembler::movq( MMXRegister dst, Address src ) {
1852   assert( VM_Version::supports_mmx(), "" );
1853   emit_int8(0x0F);
1854   emit_int8(0x6F);
1855   emit_operand(dst, src);
1856 }
1857 
1858 void Assembler::movq( Address dst, MMXRegister src ) {
1859   assert( VM_Version::supports_mmx(), "" );
1860   emit_int8(0x0F);
1861   emit_int8(0x7F);
1862   // workaround gcc (3.2.1-7a) bug
1863   // In that version of gcc with only an emit_operand(MMX, Address)
1864   // gcc will tail jump and try and reverse the parameters completely
1865   // obliterating dst in the process. By having a version available
1866   // that doesn't need to swap the args at the tail jump the bug is
1867   // avoided.
1868   emit_operand(dst, src);
1869 }
1870 
1871 void Assembler::movq(XMMRegister dst, Address src) {
1872   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1873   InstructionMark im(this);
1874   simd_prefix(dst, src, VEX_SIMD_F3);






1875   emit_int8(0x7E);
1876   emit_operand(dst, src);
1877 }
1878 
1879 void Assembler::movq(Address dst, XMMRegister src) {
1880   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1881   InstructionMark im(this);
1882   simd_prefix(dst, src, VEX_SIMD_66);






1883   emit_int8((unsigned char)0xD6);
1884   emit_operand(src, dst);
1885 }
1886 
1887 void Assembler::movsbl(Register dst, Address src) { // movsxb
1888   InstructionMark im(this);
1889   prefix(src, dst);
1890   emit_int8(0x0F);
1891   emit_int8((unsigned char)0xBE);
1892   emit_operand(dst, src);
1893 }
1894 
1895 void Assembler::movsbl(Register dst, Register src) { // movsxb
1896   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1897   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1898   emit_int8(0x0F);
1899   emit_int8((unsigned char)0xBE);
1900   emit_int8((unsigned char)(0xC0 | encode));
1901 }
1902 
1903 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
1904   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



1905   emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);

1906 }
1907 
1908 void Assembler::movsd(XMMRegister dst, Address src) {
1909   NOT_LP64(assert(VM_Version::supports_sse2(), ""));





1910   emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);

1911 }
1912 
1913 void Assembler::movsd(Address dst, XMMRegister src) {
1914   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1915   InstructionMark im(this);
1916   simd_prefix(dst, src, VEX_SIMD_F2);






1917   emit_int8(0x11);
1918   emit_operand(src, dst);
1919 }
1920 
1921 void Assembler::movss(XMMRegister dst, XMMRegister src) {
1922   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1923   emit_simd_arith(0x10, dst, src, VEX_SIMD_F3);
1924 }
1925 
1926 void Assembler::movss(XMMRegister dst, Address src) {
1927   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1928   emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3);




1929 }
1930 
1931 void Assembler::movss(Address dst, XMMRegister src) {
1932   NOT_LP64(assert(VM_Version::supports_sse(), ""));




1933   InstructionMark im(this);
1934   simd_prefix(dst, src, VEX_SIMD_F3);
1935   emit_int8(0x11);
1936   emit_operand(src, dst);
1937 }
1938 
1939 void Assembler::movswl(Register dst, Address src) { // movsxw
1940   InstructionMark im(this);
1941   prefix(src, dst);
1942   emit_int8(0x0F);
1943   emit_int8((unsigned char)0xBF);
1944   emit_operand(dst, src);
1945 }
1946 
1947 void Assembler::movswl(Register dst, Register src) { // movsxw
1948   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1949   emit_int8(0x0F);
1950   emit_int8((unsigned char)0xBF);
1951   emit_int8((unsigned char)(0xC0 | encode));
1952 }
1953 
1954 void Assembler::movw(Address dst, int imm16) {

2006   emit_int8(0x0F);
2007   emit_int8((unsigned char)0xB7);
2008   emit_int8(0xC0 | encode);
2009 }
2010 
2011 void Assembler::mull(Address src) {
2012   InstructionMark im(this);
2013   prefix(src);
2014   emit_int8((unsigned char)0xF7);
2015   emit_operand(rsp, src);
2016 }
2017 
2018 void Assembler::mull(Register src) {
2019   int encode = prefix_and_encode(src->encoding());
2020   emit_int8((unsigned char)0xF7);
2021   emit_int8((unsigned char)(0xE0 | encode));
2022 }
2023 
2024 void Assembler::mulsd(XMMRegister dst, Address src) {
2025   NOT_LP64(assert(VM_Version::supports_sse2(), ""));





2026   emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);

2027 }
2028 
2029 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
2030   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



2031   emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);

2032 }
2033 
2034 void Assembler::mulss(XMMRegister dst, Address src) {
2035   NOT_LP64(assert(VM_Version::supports_sse(), ""));




2036   emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
2037 }
2038 
2039 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
2040   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2041   emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
2042 }
2043 
2044 void Assembler::negl(Register dst) {
2045   int encode = prefix_and_encode(dst->encoding());
2046   emit_int8((unsigned char)0xF7);
2047   emit_int8((unsigned char)(0xD8 | encode));
2048 }
2049 
2050 void Assembler::nop(int i) {
2051 #ifdef ASSERT
2052   assert(i > 0, " ");
2053   // The fancy nops aren't currently recognized by debuggers making it a
2054   // pain to disassemble code while debugging. If asserts are on clearly
2055   // speed is not an issue so simply use the single byte traditional nop

2315 void Assembler::orl(Register dst, int32_t imm32) {
2316   prefix(dst);
2317   emit_arith(0x81, 0xC8, dst, imm32);
2318 }
2319 
2320 void Assembler::orl(Register dst, Address src) {
2321   InstructionMark im(this);
2322   prefix(src, dst);
2323   emit_int8(0x0B);
2324   emit_operand(dst, src);
2325 }
2326 
2327 void Assembler::orl(Register dst, Register src) {
2328   (void) prefix_and_encode(dst->encoding(), src->encoding());
2329   emit_arith(0x0B, 0xC0, dst, src);
2330 }
2331 
2332 void Assembler::packuswb(XMMRegister dst, Address src) {
2333   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2334   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");




2335   emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
2336 }
2337 
2338 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
2339   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2340   emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
2341 }
2342 
2343 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
2344   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
2345   emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector256);
2346 }
2347 
2348 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256) {
2349   assert(VM_Version::supports_avx2(), "");
2350   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true, vector256);
2351   emit_int8(0x00);
2352   emit_int8(0xC0 | encode);
2353   emit_int8(imm8);
2354 }
2355 
2356 void Assembler::pause() {
2357   emit_int8((unsigned char)0xF3);
2358   emit_int8((unsigned char)0x90);
2359 }
2360 
2361 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
2362   assert(VM_Version::supports_sse4_2(), "");
2363   InstructionMark im(this);
2364   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
2365   emit_int8(0x61);
2366   emit_operand(dst, src);
2367   emit_int8(imm8);
2368 }
2369 
2370 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
2371   assert(VM_Version::supports_sse4_2(), "");
2372   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
2373   emit_int8(0x61);
2374   emit_int8((unsigned char)(0xC0 | encode));
2375   emit_int8(imm8);
2376 }
2377 
2378 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
2379   assert(VM_Version::supports_sse4_1(), "");
2380   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
2381   emit_int8(0x16);
2382   emit_int8((unsigned char)(0xC0 | encode));
2383   emit_int8(imm8);
2384 }
2385 
2386 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
2387   assert(VM_Version::supports_sse4_1(), "");
2388   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
2389   emit_int8(0x16);
2390   emit_int8((unsigned char)(0xC0 | encode));
2391   emit_int8(imm8);
2392 }
2393 
2394 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
2395   assert(VM_Version::supports_sse4_1(), "");
2396   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
2397   emit_int8(0x22);
2398   emit_int8((unsigned char)(0xC0 | encode));
2399   emit_int8(imm8);
2400 }
2401 
2402 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
2403   assert(VM_Version::supports_sse4_1(), "");
2404   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
2405   emit_int8(0x22);
2406   emit_int8((unsigned char)(0xC0 | encode));
2407   emit_int8(imm8);
2408 }
2409 
2410 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
2411   assert(VM_Version::supports_sse4_1(), "");



2412   InstructionMark im(this);
2413   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2414   emit_int8(0x30);
2415   emit_operand(dst, src);
2416 }
2417 
2418 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
2419   assert(VM_Version::supports_sse4_1(), "");
2420   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2421   emit_int8(0x30);
2422   emit_int8((unsigned char)(0xC0 | encode));
2423 }
2424 
2425 // generic
2426 void Assembler::pop(Register dst) {
2427   int encode = prefix_and_encode(dst->encoding());
2428   emit_int8(0x58 | encode);
2429 }
2430 
2431 void Assembler::popcntl(Register dst, Address src) {
2432   assert(VM_Version::supports_popcnt(), "must support");
2433   InstructionMark im(this);
2434   emit_int8((unsigned char)0xF3);
2435   prefix(src, dst);
2436   emit_int8(0x0F);
2437   emit_int8((unsigned char)0xB8);
2438   emit_operand(dst, src);
2439 }
2440

2503   InstructionMark im(this);
2504   prefetch_prefix(src);
2505   emit_int8(0x18);
2506   emit_operand(rbx, src); // 3, src
2507 }
2508 
2509 void Assembler::prefetchw(Address src) {
2510   assert(VM_Version::supports_3dnow_prefetch(), "must support");
2511   InstructionMark im(this);
2512   prefetch_prefix(src);
2513   emit_int8(0x0D);
2514   emit_operand(rcx, src); // 1, src
2515 }
2516 
2517 void Assembler::prefix(Prefix p) {
2518   emit_int8(p);
2519 }
2520 
2521 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
2522   assert(VM_Version::supports_ssse3(), "");
2523   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2524   emit_int8(0x00);
2525   emit_int8((unsigned char)(0xC0 | encode));
2526 }
2527 
2528 void Assembler::pshufb(XMMRegister dst, Address src) {
2529   assert(VM_Version::supports_ssse3(), "");



2530   InstructionMark im(this);
2531   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2532   emit_int8(0x00);
2533   emit_operand(dst, src);
2534 }
2535 
2536 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
2537   assert(isByte(mode), "invalid value");
2538   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2539   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
2540   emit_int8(mode & 0xFF);
2541 
2542 }
2543 
2544 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
2545   assert(isByte(mode), "invalid value");
2546   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2547   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");




2548   InstructionMark im(this);
2549   simd_prefix(dst, src, VEX_SIMD_66);
2550   emit_int8(0x70);
2551   emit_operand(dst, src);
2552   emit_int8(mode & 0xFF);
2553 }
2554 
2555 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
2556   assert(isByte(mode), "invalid value");
2557   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2558   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2);
2559   emit_int8(mode & 0xFF);
2560 }
2561 
2562 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
2563   assert(isByte(mode), "invalid value");
2564   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2565   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");



2566   InstructionMark im(this);
2567   simd_prefix(dst, src, VEX_SIMD_F2);
2568   emit_int8(0x70);
2569   emit_operand(dst, src);
2570   emit_int8(mode & 0xFF);
2571 }
2572 
2573 void Assembler::psrldq(XMMRegister dst, int shift) {
2574   // Shift 128 bit value in xmm register by number of bytes.
2575   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2576   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66);
2577   emit_int8(0x73);
2578   emit_int8((unsigned char)(0xC0 | encode));
2579   emit_int8(shift);
2580 }
2581 
2582 void Assembler::ptest(XMMRegister dst, Address src) {
2583   assert(VM_Version::supports_sse4_1(), "");
2584   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2585   InstructionMark im(this);
2586   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2587   emit_int8(0x17);
2588   emit_operand(dst, src);
2589 }
2590 
2591 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
2592   assert(VM_Version::supports_sse4_1(), "");
2593   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2594   emit_int8(0x17);
2595   emit_int8((unsigned char)(0xC0 | encode));
2596 }
2597 
2598 void Assembler::vptest(XMMRegister dst, Address src) {
2599   assert(VM_Version::supports_avx(), "");
2600   InstructionMark im(this);
2601   bool vector256 = true;
2602   assert(dst != xnoreg, "sanity");
2603   int dst_enc = dst->encoding();
2604   // swap src<->dst for encoding
2605   vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
2606   emit_int8(0x17);
2607   emit_operand(dst, src);
2608 }
2609 
2610 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
2611   assert(VM_Version::supports_avx(), "");
2612   bool vector256 = true;
2613   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
2614   emit_int8(0x17);
2615   emit_int8((unsigned char)(0xC0 | encode));
2616 }
2617 
2618 void Assembler::punpcklbw(XMMRegister dst, Address src) {
2619   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2620   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");



2621   emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
2622 }
2623 
2624 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
2625   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2626   emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
2627 }
2628 
2629 void Assembler::punpckldq(XMMRegister dst, Address src) {
2630   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2631   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");




2632   emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
2633 }
2634 
2635 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
2636   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2637   emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
2638 }
2639 
2640 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
2641   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2642   emit_simd_arith(0x6C, dst, src, VEX_SIMD_66);
2643 }
2644 
2645 void Assembler::push(int32_t imm32) {
2646   // in 64bits we push 64bits onto the stack but only
2647   // take a 32bit immediate
2648   emit_int8(0x68);
2649   emit_int32(imm32);
2650 }
2651

2821   assert(isShiftCount(imm8), "illegal shift count");
2822   int encode = prefix_and_encode(dst->encoding());
2823   emit_int8((unsigned char)0xC1);
2824   emit_int8((unsigned char)(0xE8 | encode));
2825   emit_int8(imm8);
2826 }
2827 
2828 void Assembler::shrl(Register dst) {
2829   int encode = prefix_and_encode(dst->encoding());
2830   emit_int8((unsigned char)0xD3);
2831   emit_int8((unsigned char)(0xE8 | encode));
2832 }
2833 
2834 // copies a single word from [esi] to [edi]
2835 void Assembler::smovl() {
2836   emit_int8((unsigned char)0xA5);
2837 }
2838 
2839 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
2840   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



2841   emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);

2842 }
2843 
2844 void Assembler::sqrtsd(XMMRegister dst, Address src) {
2845   NOT_LP64(assert(VM_Version::supports_sse2(), ""));





2846   emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);

2847 }
2848 
2849 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
2850   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2851   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
2852 }
2853 
2854 void Assembler::std() {
2855   emit_int8((unsigned char)0xFD);
2856 }
2857 
2858 void Assembler::sqrtss(XMMRegister dst, Address src) {
2859   NOT_LP64(assert(VM_Version::supports_sse(), ""));




2860   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
2861 }
2862 
2863 void Assembler::stmxcsr( Address dst) {
2864   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2865   InstructionMark im(this);
2866   prefix(dst);
2867   emit_int8(0x0F);
2868   emit_int8((unsigned char)0xAE);
2869   emit_operand(as_Register(3), dst);
2870 }
2871 
2872 void Assembler::subl(Address dst, int32_t imm32) {
2873   InstructionMark im(this);
2874   prefix(dst);
2875   emit_arith_operand(0x81, rbp, dst, imm32);
2876 }
2877 
2878 void Assembler::subl(Address dst, Register src) {
2879   InstructionMark im(this);

2890 // Force generation of a 4 byte immediate value even if it fits into 8bit
2891 void Assembler::subl_imm32(Register dst, int32_t imm32) {
2892   prefix(dst);
2893   emit_arith_imm32(0x81, 0xE8, dst, imm32);
2894 }
2895 
2896 void Assembler::subl(Register dst, Address src) {
2897   InstructionMark im(this);
2898   prefix(src, dst);
2899   emit_int8(0x2B);
2900   emit_operand(dst, src);
2901 }
2902 
2903 void Assembler::subl(Register dst, Register src) {
2904   (void) prefix_and_encode(dst->encoding(), src->encoding());
2905   emit_arith(0x2B, 0xC0, dst, src);
2906 }
2907 
2908 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
2909   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



2910   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);

2911 }
2912 
2913 void Assembler::subsd(XMMRegister dst, Address src) {
2914   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2915   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);




2916 }
2917 
2918 void Assembler::subss(XMMRegister dst, XMMRegister src) {
2919   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2920   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
2921 }
2922 
2923 void Assembler::subss(XMMRegister dst, Address src) {
2924   NOT_LP64(assert(VM_Version::supports_sse(), ""));




2925   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
2926 }
2927 
2928 void Assembler::testb(Register dst, int imm8) {
2929   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2930   (void) prefix_and_encode(dst->encoding(), true);
2931   emit_arith_b(0xF6, 0xC0, dst, imm8);
2932 }
2933 
2934 void Assembler::testl(Register dst, int32_t imm32) {
2935   // not using emit_arith because test
2936   // doesn't support sign-extension of
2937   // 8bit operands
2938   int encode = dst->encoding();
2939   if (encode == 0) {
2940     emit_int8((unsigned char)0xA9);
2941   } else {
2942     encode = prefix_and_encode(encode);
2943     emit_int8((unsigned char)0xF7);
2944     emit_int8((unsigned char)(0xC0 | encode));

2961 void Assembler::tzcntl(Register dst, Register src) {
2962   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
2963   emit_int8((unsigned char)0xF3);
2964   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2965   emit_int8(0x0F);
2966   emit_int8((unsigned char)0xBC);
2967   emit_int8((unsigned char)0xC0 | encode);
2968 }
2969 
2970 void Assembler::tzcntq(Register dst, Register src) {
2971   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
2972   emit_int8((unsigned char)0xF3);
2973   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2974   emit_int8(0x0F);
2975   emit_int8((unsigned char)0xBC);
2976   emit_int8((unsigned char)(0xC0 | encode));
2977 }
2978 
2979 void Assembler::ucomisd(XMMRegister dst, Address src) {
2980   NOT_LP64(assert(VM_Version::supports_sse2(), ""));





2981   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);

2982 }
2983 
2984 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
2985   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



2986   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);

2987 }
2988 
2989 void Assembler::ucomiss(XMMRegister dst, Address src) {
2990   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2991   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);




2992 }
2993 
2994 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
2995   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2996   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
2997 }
2998 
2999 void Assembler::xabort(int8_t imm8) {
3000   emit_int8((unsigned char)0xC6);
3001   emit_int8((unsigned char)0xF8);
3002   emit_int8((unsigned char)(imm8 & 0xFF));
3003 }
3004 
3005 void Assembler::xaddl(Address dst, Register src) {
3006   InstructionMark im(this);
3007   prefix(dst, src);
3008   emit_int8(0x0F);
3009   emit_int8((unsigned char)0xC1);
3010   emit_operand(src, dst);
3011 }
3012 
3013 void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
3014   InstructionMark im(this);
3015   relocate(rtype);
3016   if (abort.is_bound()) {

3058   emit_arith(0x81, 0xF0, dst, imm32);
3059 }
3060 
3061 void Assembler::xorl(Register dst, Address src) {
3062   InstructionMark im(this);
3063   prefix(src, dst);
3064   emit_int8(0x33);
3065   emit_operand(dst, src);
3066 }
3067 
3068 void Assembler::xorl(Register dst, Register src) {
3069   (void) prefix_and_encode(dst->encoding(), src->encoding());
3070   emit_arith(0x33, 0xC0, dst, src);
3071 }
3072 
3073 
3074 // AVX 3-operands scalar float-point arithmetic instructions
3075 
3076 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
3077   assert(VM_Version::supports_avx(), "");
3078   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);






3079 }
3080 
3081 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3082   assert(VM_Version::supports_avx(), "");
3083   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);




3084 }
3085 
3086 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
3087   assert(VM_Version::supports_avx(), "");
3088   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);




3089 }
3090 
3091 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3092   assert(VM_Version::supports_avx(), "");
3093   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3094 }
3095 
3096 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
3097   assert(VM_Version::supports_avx(), "");
3098   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);






3099 }
3100 
3101 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3102   assert(VM_Version::supports_avx(), "");
3103   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);




3104 }
3105 
3106 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
3107   assert(VM_Version::supports_avx(), "");
3108   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);




3109 }
3110 
3111 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3112   assert(VM_Version::supports_avx(), "");
3113   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3114 }
3115 
3116 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
3117   assert(VM_Version::supports_avx(), "");
3118   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);






3119 }
3120 
3121 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3122   assert(VM_Version::supports_avx(), "");
3123   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);




3124 }
3125 
3126 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
3127   assert(VM_Version::supports_avx(), "");
3128   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);




3129 }
3130 
3131 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3132   assert(VM_Version::supports_avx(), "");
3133   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3134 }
3135 
3136 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
3137   assert(VM_Version::supports_avx(), "");
3138   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);






3139 }
3140 
3141 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3142   assert(VM_Version::supports_avx(), "");
3143   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);




3144 }
3145 
3146 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
3147   assert(VM_Version::supports_avx(), "");
3148   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);




3149 }
3150 
3151 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3152   assert(VM_Version::supports_avx(), "");
3153   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3154 }
3155 
3156 //====================VECTOR ARITHMETIC=====================================
3157 
3158 // Float-point vector arithmetic
3159 
3160 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
3161   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



3162   emit_simd_arith(0x58, dst, src, VEX_SIMD_66);

3163 }
3164 
3165 void Assembler::addps(XMMRegister dst, XMMRegister src) {
3166   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3167   emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
3168 }
3169 
3170 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3171   assert(VM_Version::supports_avx(), "");
3172   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);




3173 }
3174 
3175 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3176   assert(VM_Version::supports_avx(), "");
3177   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
3178 }
3179 
3180 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3181   assert(VM_Version::supports_avx(), "");
3182   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);






3183 }
3184 
3185 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3186   assert(VM_Version::supports_avx(), "");
3187   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);




3188 }
3189 
3190 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
3191   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



3192   emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);

3193 }
3194 
3195 void Assembler::subps(XMMRegister dst, XMMRegister src) {
3196   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3197   emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
3198 }
3199 
3200 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3201   assert(VM_Version::supports_avx(), "");
3202   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);




3203 }
3204 
3205 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3206   assert(VM_Version::supports_avx(), "");
3207   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
3208 }
3209 
3210 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3211   assert(VM_Version::supports_avx(), "");
3212   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);






3213 }
3214 
3215 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3216   assert(VM_Version::supports_avx(), "");
3217   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);




3218 }
3219 
3220 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
3221   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



3222   emit_simd_arith(0x59, dst, src, VEX_SIMD_66);

3223 }
3224 
3225 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
3226   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3227   emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
3228 }
3229 
3230 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3231   assert(VM_Version::supports_avx(), "");
3232   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);




3233 }
3234 
3235 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3236   assert(VM_Version::supports_avx(), "");
3237   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
3238 }
3239 
3240 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3241   assert(VM_Version::supports_avx(), "");
3242   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);






3243 }
3244 
3245 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3246   assert(VM_Version::supports_avx(), "");
3247   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);




3248 }
3249 
3250 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
3251   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



3252   emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);

3253 }
3254 
3255 void Assembler::divps(XMMRegister dst, XMMRegister src) {
3256   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3257   emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
3258 }
3259 
3260 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3261   assert(VM_Version::supports_avx(), "");
3262   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);




3263 }
3264 
3265 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3266   assert(VM_Version::supports_avx(), "");
3267   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
3268 }
3269 
3270 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3271   assert(VM_Version::supports_avx(), "");
3272   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);






3273 }
3274 
3275 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3276   assert(VM_Version::supports_avx(), "");
3277   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);




3278 }
3279 
3280 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
3281   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



3282   emit_simd_arith(0x54, dst, src, VEX_SIMD_66);

3283 }
3284 
3285 void Assembler::andps(XMMRegister dst, XMMRegister src) {
3286   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3287   emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
3288 }
3289 
3290 void Assembler::andps(XMMRegister dst, Address src) {
3291   NOT_LP64(assert(VM_Version::supports_sse(), ""));




3292   emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
3293 }
3294 
3295 void Assembler::andpd(XMMRegister dst, Address src) {
3296   NOT_LP64(assert(VM_Version::supports_sse2(), ""));





3297   emit_simd_arith(0x54, dst, src, VEX_SIMD_66);

3298 }
3299 
3300 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3301   assert(VM_Version::supports_avx(), "");
3302   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);




3303 }
3304 
3305 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3306   assert(VM_Version::supports_avx(), "");
3307   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
3308 }
3309 
3310 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3311   assert(VM_Version::supports_avx(), "");
3312   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);






3313 }
3314 
3315 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3316   assert(VM_Version::supports_avx(), "");
3317   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);




3318 }
3319 
3320 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
3321   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



3322   emit_simd_arith(0x57, dst, src, VEX_SIMD_66);

3323 }
3324 
3325 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
3326   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3327   emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
3328 }
3329 
3330 void Assembler::xorpd(XMMRegister dst, Address src) {
3331   NOT_LP64(assert(VM_Version::supports_sse2(), ""));





3332   emit_simd_arith(0x57, dst, src, VEX_SIMD_66);

3333 }
3334 
3335 void Assembler::xorps(XMMRegister dst, Address src) {
3336   NOT_LP64(assert(VM_Version::supports_sse(), ""));




3337   emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
3338 }
3339 
3340 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3341   assert(VM_Version::supports_avx(), "");
3342   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);




3343 }
3344 
3345 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3346   assert(VM_Version::supports_avx(), "");
3347   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
3348 }
3349 
3350 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3351   assert(VM_Version::supports_avx(), "");
3352   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);






3353 }
3354 
3355 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3356   assert(VM_Version::supports_avx(), "");
3357   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);




3358 }
3359 
3360 
3361 // Integer vector arithmetic
3362 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3363   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3364   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
3365   emit_int8(0x01);
3366   emit_int8((unsigned char)(0xC0 | encode));
3367 }
3368 
3369 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3370   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3371   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
3372   emit_int8(0x02);
3373   emit_int8((unsigned char)(0xC0 | encode));
3374 }
3375 
3376 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
3377   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3378   emit_simd_arith(0xFC, dst, src, VEX_SIMD_66);
3379 }
3380 
3381 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
3382   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3383   emit_simd_arith(0xFD, dst, src, VEX_SIMD_66);
3384 }
3385 
3386 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
3387   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3388   emit_simd_arith(0xFE, dst, src, VEX_SIMD_66);
3389 }
3390 
3391 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
3392   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



3393   emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);

3394 }
3395 
3396 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
3397   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
3398   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
3399   emit_int8(0x01);
3400   emit_int8((unsigned char)(0xC0 | encode));
3401 }
3402 
3403 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
3404   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
3405   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
3406   emit_int8(0x02);
3407   emit_int8((unsigned char)(0xC0 | encode));
3408 }
3409 
3410 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3411   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3412   emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
3413 }
3414 
3415 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3416   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3417   emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
3418 }
3419 
3420 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3421   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3422   emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
3423 }
3424 
3425 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3426   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3427   emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);




3428 }
3429 
3430 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3431   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3432   emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);



3433 }
3434 
3435 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3436   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3437   emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);



3438 }
3439 
3440 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3441   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3442   emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);




3443 }
3444 
3445 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3446   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3447   emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);






3448 }
3449 
3450 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
3451   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3452   emit_simd_arith(0xF8, dst, src, VEX_SIMD_66);
3453 }
3454 
3455 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
3456   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3457   emit_simd_arith(0xF9, dst, src, VEX_SIMD_66);
3458 }
3459 
3460 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
3461   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3462   emit_simd_arith(0xFA, dst, src, VEX_SIMD_66);
3463 }
3464 
3465 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
3466   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



3467   emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);

3468 }
3469 
3470 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3471   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3472   emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
3473 }
3474 
3475 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3476   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3477   emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
3478 }
3479 
3480 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3481   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3482   emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
3483 }
3484 
3485 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3486   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3487   emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);




3488 }
3489 
3490 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3491   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3492   emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);



3493 }
3494 
3495 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3496   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3497   emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);



3498 }
3499 
3500 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3501   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3502   emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);




3503 }
3504 
3505 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3506   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3507   emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);






3508 }
3509 
3510 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
3511   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3512   emit_simd_arith(0xD5, dst, src, VEX_SIMD_66);
3513 }
3514 
3515 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
3516   assert(VM_Version::supports_sse4_1(), "");
3517   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
3518   emit_int8(0x40);
3519   emit_int8((unsigned char)(0xC0 | encode));
3520 }
3521 
3522 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3523   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3524   emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
3525 }
3526 
3527 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3528   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3529   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
3530   emit_int8(0x40);
3531   emit_int8((unsigned char)(0xC0 | encode));
3532 }
3533 
3534 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3535   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3536   emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);



























3537 }
3538 
3539 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3540   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");




3541   InstructionMark im(this);
3542   int dst_enc = dst->encoding();
3543   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
3544   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
3545   emit_int8(0x40);
3546   emit_operand(dst, src);
3547 }
3548 
3549 // Shift packed integers left by specified number of bits.
3550 void Assembler::psllw(XMMRegister dst, int shift) {
3551   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3552   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
3553   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
3554   emit_int8(0x71);
3555   emit_int8((unsigned char)(0xC0 | encode));
3556   emit_int8(shift & 0xFF);
3557 }
3558 
3559 void Assembler::pslld(XMMRegister dst, int shift) {
3560   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3561   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
3562   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
3563   emit_int8(0x72);
3564   emit_int8((unsigned char)(0xC0 | encode));
3565   emit_int8(shift & 0xFF);
3566 }
3567 
3568 void Assembler::psllq(XMMRegister dst, int shift) {
3569   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3570   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
3571   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
3572   emit_int8(0x73);
3573   emit_int8((unsigned char)(0xC0 | encode));
3574   emit_int8(shift & 0xFF);
3575 }
3576 
3577 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
3578   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3579   emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66);
3580 }
3581 
3582 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
3583   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3584   emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66);
3585 }
3586 
3587 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
3588   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



3589   emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);

3590 }
3591 
3592 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3593   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3594   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
3595   emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256);
3596   emit_int8(shift & 0xFF);
3597 }
3598 
3599 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3600   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3601   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
3602   emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256);
3603   emit_int8(shift & 0xFF);
3604 }
3605 
3606 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3607   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3608   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
3609   emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256);




3610   emit_int8(shift & 0xFF);
3611 }
3612 
3613 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3614   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3615   emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector256);
3616 }
3617 
3618 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3619   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3620   emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector256);
3621 }
3622 
3623 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3624   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3625   emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector256);




3626 }
3627 
3628 // Shift packed integers logically right by specified number of bits.
3629 void Assembler::psrlw(XMMRegister dst, int shift) {
3630   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3631   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
3632   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
3633   emit_int8(0x71);
3634   emit_int8((unsigned char)(0xC0 | encode));
3635   emit_int8(shift & 0xFF);
3636 }
3637 
3638 void Assembler::psrld(XMMRegister dst, int shift) {
3639   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3640   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
3641   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
3642   emit_int8(0x72);
3643   emit_int8((unsigned char)(0xC0 | encode));
3644   emit_int8(shift & 0xFF);
3645 }
3646 
3647 void Assembler::psrlq(XMMRegister dst, int shift) {
3648   // Do not confuse it with psrldq SSE2 instruction which
3649   // shifts 128 bit value in xmm register by number of bytes.
3650   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3651   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3652   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);





3653   emit_int8(0x73);
3654   emit_int8((unsigned char)(0xC0 | encode));
3655   emit_int8(shift & 0xFF);
3656 }
3657 
3658 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
3659   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3660   emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66);
3661 }
3662 
3663 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
3664   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3665   emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66);
3666 }
3667 
3668 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
3669   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



3670   emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);

3671 }
3672 
3673 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3674   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3675   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3676   emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256);
3677   emit_int8(shift & 0xFF);
3678 }
3679 
3680 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3681   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3682   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3683   emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256);
3684   emit_int8(shift & 0xFF);
3685 }
3686 
3687 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3688   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3689   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3690   emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256);




3691   emit_int8(shift & 0xFF);
3692 }
3693 
3694 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3695   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3696   emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector256);
3697 }
3698 
3699 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3700   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3701   emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector256);
3702 }
3703 
3704 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3705   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3706   emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector256);




3707 }
3708 
3709 // Shift packed integers arithmetically right by specified number of bits.
3710 void Assembler::psraw(XMMRegister dst, int shift) {
3711   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3712   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3713   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
3714   emit_int8(0x71);
3715   emit_int8((unsigned char)(0xC0 | encode));
3716   emit_int8(shift & 0xFF);
3717 }
3718 
3719 void Assembler::psrad(XMMRegister dst, int shift) {
3720   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3721   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
3722   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
3723   emit_int8(0x72);
3724   emit_int8((unsigned char)(0xC0 | encode));
3725   emit_int8(shift & 0xFF);
3726 }
3727 
3728 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
3729   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3730   emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66);
3731 }
3732 
3733 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
3734   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3735   emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
3736 }
3737 
3738 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3739   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3740   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3741   emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256);
3742   emit_int8(shift & 0xFF);
3743 }
3744 
3745 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3746   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3747   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3748   emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256);
3749   emit_int8(shift & 0xFF);
3750 }
3751 
3752 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3753   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3754   emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector256);
3755 }
3756 
3757 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3758   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3759   emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector256);
3760 }
3761 
3762 
3763 // AND packed integers
3764 void Assembler::pand(XMMRegister dst, XMMRegister src) {
3765   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3766   emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
3767 }
3768 
3769 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3770   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3771   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
3772 }
3773 
3774 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3775   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3776   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);




3777 }
3778 
3779 void Assembler::por(XMMRegister dst, XMMRegister src) {
3780   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3781   emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
3782 }
3783 
3784 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3785   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3786   emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
3787 }
3788 
3789 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3790   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3791   emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);




3792 }
3793 
3794 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
3795   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3796   emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
3797 }
3798 
3799 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3800   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3801   emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
3802 }
3803 
3804 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3805   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3806   emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);




3807 }
3808 
3809 
3810 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3811   assert(VM_Version::supports_avx(), "");
3812   bool vector256 = true;
3813   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
3814   emit_int8(0x18);
3815   emit_int8((unsigned char)(0xC0 | encode));
3816   // 0x00 - insert into lower 128 bits
3817   // 0x01 - insert into upper 128 bits
3818   emit_int8(0x01);
3819 }
3820 











3821 void Assembler::vinsertf128h(XMMRegister dst, Address src) {
3822   assert(VM_Version::supports_avx(), "");




3823   InstructionMark im(this);
3824   bool vector256 = true;
3825   assert(dst != xnoreg, "sanity");
3826   int dst_enc = dst->encoding();
3827   // swap src<->dst for encoding
3828   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3829   emit_int8(0x18);
3830   emit_operand(dst, src);
3831   // 0x01 - insert into upper 128 bits
3832   emit_int8(0x01);
3833 }
3834 
3835 void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) {
3836   assert(VM_Version::supports_avx(), "");
3837   bool vector256 = true;
3838   int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
3839   emit_int8(0x19);
3840   emit_int8((unsigned char)(0xC0 | encode));
3841   // 0x00 - insert into lower 128 bits
3842   // 0x01 - insert into upper 128 bits
3843   emit_int8(0x01);
3844 }
3845 
3846 void Assembler::vextractf128h(Address dst, XMMRegister src) {
3847   assert(VM_Version::supports_avx(), "");




3848   InstructionMark im(this);
3849   bool vector256 = true;
3850   assert(src != xnoreg, "sanity");
3851   int src_enc = src->encoding();
3852   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3853   emit_int8(0x19);
3854   emit_operand(src, dst);
3855   // 0x01 - extract from upper 128 bits
3856   emit_int8(0x01);
3857 }
3858 
3859 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3860   assert(VM_Version::supports_avx2(), "");
3861   bool vector256 = true;
3862   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
3863   emit_int8(0x38);
3864   emit_int8((unsigned char)(0xC0 | encode));
3865   // 0x00 - insert into lower 128 bits
3866   // 0x01 - insert into upper 128 bits
3867   emit_int8(0x01);
3868 }
3869 














3870 void Assembler::vinserti128h(XMMRegister dst, Address src) {
3871   assert(VM_Version::supports_avx2(), "");




3872   InstructionMark im(this);
3873   bool vector256 = true;
3874   assert(dst != xnoreg, "sanity");
3875   int dst_enc = dst->encoding();
3876   // swap src<->dst for encoding
3877   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3878   emit_int8(0x38);
3879   emit_operand(dst, src);
3880   // 0x01 - insert into upper 128 bits
3881   emit_int8(0x01);
3882 }
3883 











3884 void Assembler::vextracti128h(Address dst, XMMRegister src) {
3885   assert(VM_Version::supports_avx2(), "");




3886   InstructionMark im(this);
3887   bool vector256 = true;
3888   assert(src != xnoreg, "sanity");
3889   int src_enc = src->encoding();
3890   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3891   emit_int8(0x39);
3892   emit_operand(src, dst);
3893   // 0x01 - extract from upper 128 bits
3894   emit_int8(0x01);
3895 }
3896 


































































3897 // duplicate 4-bytes integer data from src into 8 locations in dest
3898 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
3899   assert(VM_Version::supports_avx2(), "");
3900   bool vector256 = true;
3901   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);








3902   emit_int8(0x58);
3903   emit_int8((unsigned char)(0xC0 | encode));
3904 }
3905 
3906 // Carry-Less Multiplication Quadword
3907 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
3908   assert(VM_Version::supports_clmul(), "");
3909   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);

3910   emit_int8(0x44);
3911   emit_int8((unsigned char)(0xC0 | encode));
3912   emit_int8((unsigned char)mask);
3913 }
3914 
3915 // Carry-Less Multiplication Quadword
3916 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
3917   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
3918   bool vector256 = false;
3919   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);

3920   emit_int8(0x44);
3921   emit_int8((unsigned char)(0xC0 | encode));
3922   emit_int8((unsigned char)mask);
3923 }
3924 
3925 void Assembler::vzeroupper() {
3926   assert(VM_Version::supports_avx(), "");


3927   (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
3928   emit_int8(0x77);

3929 }
3930 
3931 
3932 #ifndef _LP64
3933 // 32bit only pieces of the assembler
3934 
3935 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
3936   // NO PREFIX AS NEVER 64BIT
3937   InstructionMark im(this);
3938   emit_int8((unsigned char)0x81);
3939   emit_int8((unsigned char)(0xF8 | src1->encoding()));
3940   emit_data(imm32, rspec, 0);
3941 }
3942 
3943 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
3944   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
3945   InstructionMark im(this);
3946   emit_int8((unsigned char)0x81);
3947   emit_operand(rdi, src1);
3948   emit_data(imm32, rspec, 0);

4425   }
4426 }
4427 
4428 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
4429   if (pre > 0) {
4430     emit_int8(simd_pre[pre]);
4431   }
4432   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
4433                           prefix_and_encode(dst_enc, src_enc);
4434   if (opc > 0) {
4435     emit_int8(0x0F);
4436     int opc2 = simd_opc[opc];
4437     if (opc2 > 0) {
4438       emit_int8(opc2);
4439     }
4440   }
4441   return encode;
4442 }
4443 
4444 
4445 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) {
4446   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
4447     prefix(VEX_3bytes);
4448 
4449     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
4450     byte1 = (~byte1) & 0xE0;
4451     byte1 |= opc;
4452     emit_int8(byte1);
4453 
4454     int byte2 = ((~nds_enc) & 0xf) << 3;
4455     byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre;
4456     emit_int8(byte2);
4457   } else {
4458     prefix(VEX_2bytes);
4459 
4460     int byte1 = vex_r ? VEX_R : 0;
4461     byte1 = (~byte1) & 0x80;
4462     byte1 |= ((~nds_enc) & 0xf) << 3;
4463     byte1 |= (vector256 ? 4 : 0) | pre;
4464     emit_int8(byte1);
4465   }
4466 }
4467 
4468 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){










































4469   bool vex_r = (xreg_enc >= 8);
4470   bool vex_b = adr.base_needs_rex();
4471   bool vex_x = adr.index_needs_rex();
4472   vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);

















4473 }
4474 
4475 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) {

4476   bool vex_r = (dst_enc >= 8);
4477   bool vex_b = (src_enc >= 8);
4478   bool vex_x = false;
4479   vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);












4480   return (((dst_enc & 7) << 3) | (src_enc & 7));
4481 }
4482 
4483 
4484 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {

4485   if (UseAVX > 0) {
4486     int xreg_enc = xreg->encoding();
4487     int  nds_enc = nds->is_valid() ? nds->encoding() : 0;
4488     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256);
4489   } else {
4490     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
4491     rex_prefix(adr, xreg, pre, opc, rex_w);
4492   }
4493 }
4494 
4495 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {

4496   int dst_enc = dst->encoding();
4497   int src_enc = src->encoding();
4498   if (UseAVX > 0) {
4499     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4500     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256);
4501   } else {
4502     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
4503     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
4504   }
4505 }
4506 
4507 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
















4508   InstructionMark im(this);
4509   simd_prefix(dst, dst, src, pre);
4510   emit_int8(opcode);
4511   emit_operand(dst, src);
4512 }
4513 
4514 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
4515   int encode = simd_prefix_and_encode(dst, dst, src, pre);













4516   emit_int8(opcode);
4517   emit_int8((unsigned char)(0xC0 | encode));
4518 }
4519 
4520 // Versions with no second source register (non-destructive source).
4521 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
4522   InstructionMark im(this);
4523   simd_prefix(dst, xnoreg, src, pre);
4524   emit_int8(opcode);
4525   emit_operand(dst, src);
4526 }
4527 
4528 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
4529   int encode = simd_prefix_and_encode(dst, xnoreg, src, pre);













4530   emit_int8(opcode);
4531   emit_int8((unsigned char)(0xC0 | encode));
4532 }
4533 
4534 // 3-operands AVX instructions
4535 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
4536                                Address src, VexSimdPrefix pre, bool vector256) {
4537   InstructionMark im(this);
4538   vex_prefix(dst, nds, src, pre, vector256);
4539   emit_int8(opcode);
4540   emit_operand(dst, src);
4541 }
4542 
4543 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
4544                                XMMRegister src, VexSimdPrefix pre, bool vector256) {
4545   int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256);


















4546   emit_int8(opcode);
4547   emit_int8((unsigned char)(0xC0 | encode));
4548 }
4549 
4550 #ifndef _LP64
4551 
4552 void Assembler::incl(Register dst) {
4553   // Don't use it directly. Use MacroAssembler::incrementl() instead.
4554   emit_int8(0x40 | dst->encoding());
4555 }
4556 
4557 void Assembler::lea(Register dst, Address src) {
4558   leal(dst, src);
4559 }
4560 
4561 void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
4562   InstructionMark im(this);
4563   emit_int8((unsigned char)0xC7);
4564   emit_operand(rax, dst);
4565   emit_data((int)imm32, rspec, 0);

5023 void Assembler::andq(Register dst, Address src) {
5024   InstructionMark im(this);
5025   prefixq(src, dst);
5026   emit_int8(0x23);
5027   emit_operand(dst, src);
5028 }
5029 
5030 void Assembler::andq(Register dst, Register src) {
5031   (void) prefixq_and_encode(dst->encoding(), src->encoding());
5032   emit_arith(0x23, 0xC0, dst, src);
5033 }
5034 
5035 void Assembler::andnq(Register dst, Register src1, Register src2) {
5036   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5037   int encode = vex_prefix_0F38_and_encode_q(dst, src1, src2);
5038   emit_int8((unsigned char)0xF2);
5039   emit_int8((unsigned char)(0xC0 | encode));
5040 }
5041 
5042 void Assembler::andnq(Register dst, Register src1, Address src2) {




5043   InstructionMark im(this);
5044   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5045   vex_prefix_0F38_q(dst, src1, src2);
5046   emit_int8((unsigned char)0xF2);
5047   emit_operand(dst, src2);
5048 }
5049 
5050 void Assembler::bsfq(Register dst, Register src) {
5051   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5052   emit_int8(0x0F);
5053   emit_int8((unsigned char)0xBC);
5054   emit_int8((unsigned char)(0xC0 | encode));
5055 }
5056 
5057 void Assembler::bsrq(Register dst, Register src) {
5058   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5059   emit_int8(0x0F);
5060   emit_int8((unsigned char)0xBD);
5061   emit_int8((unsigned char)(0xC0 | encode));
5062 }

5164   emit_arith(0x3B, 0xC0, dst, src);
5165 }
5166 
5167 void Assembler::cmpq(Register dst, Address  src) {
5168   InstructionMark im(this);
5169   prefixq(src, dst);
5170   emit_int8(0x3B);
5171   emit_operand(dst, src);
5172 }
5173 
5174 void Assembler::cmpxchgq(Register reg, Address adr) {
5175   InstructionMark im(this);
5176   prefixq(adr, reg);
5177   emit_int8(0x0F);
5178   emit_int8((unsigned char)0xB1);
5179   emit_operand(reg, adr);
5180 }
5181 
5182 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
5183   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5184   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2);
5185   emit_int8(0x2A);
5186   emit_int8((unsigned char)(0xC0 | encode));
5187 }
5188 
5189 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
5190   NOT_LP64(assert(VM_Version::supports_sse2(), ""));




5191   InstructionMark im(this);
5192   simd_prefix_q(dst, dst, src, VEX_SIMD_F2);
5193   emit_int8(0x2A);
5194   emit_operand(dst, src);
5195 }
5196 
5197 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
5198   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5199   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3);
5200   emit_int8(0x2A);
5201   emit_int8((unsigned char)(0xC0 | encode));
5202 }
5203 
5204 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
5205   NOT_LP64(assert(VM_Version::supports_sse(), ""));




5206   InstructionMark im(this);
5207   simd_prefix_q(dst, dst, src, VEX_SIMD_F3);
5208   emit_int8(0x2A);
5209   emit_operand(dst, src);
5210 }
5211 
5212 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
5213   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5214   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2);
5215   emit_int8(0x2C);
5216   emit_int8((unsigned char)(0xC0 | encode));
5217 }
5218 
5219 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
5220   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5221   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3);
5222   emit_int8(0x2C);
5223   emit_int8((unsigned char)(0xC0 | encode));
5224 }
5225 
5226 void Assembler::decl(Register dst) {
5227   // Don't use it directly. Use MacroAssembler::decrementl() instead.
5228   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
5229   int encode = prefix_and_encode(dst->encoding());
5230   emit_int8((unsigned char)0xFF);
5231   emit_int8((unsigned char)(0xC8 | encode));
5232 }
5233 
5234 void Assembler::decq(Register dst) {
5235   // Don't use it directly. Use MacroAssembler::decrementq() instead.
5236   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
5237   int encode = prefixq_and_encode(dst->encoding());
5238   emit_int8((unsigned char)0xFF);
5239   emit_int8(0xC8 | encode);
5240 }
5241

5370 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
5371   InstructionMark im(this);
5372   prefix(src1);
5373   emit_int8((unsigned char)0x81);
5374   emit_operand(rax, src1, 4);
5375   emit_data((int)imm32, rspec, narrow_oop_operand);
5376 }
5377 
5378 void Assembler::lzcntq(Register dst, Register src) {
5379   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
5380   emit_int8((unsigned char)0xF3);
5381   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5382   emit_int8(0x0F);
5383   emit_int8((unsigned char)0xBD);
5384   emit_int8((unsigned char)(0xC0 | encode));
5385 }
5386 
5387 void Assembler::movdq(XMMRegister dst, Register src) {
5388   // table D-1 says MMX/SSE2
5389   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5390   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66);
5391   emit_int8(0x6E);
5392   emit_int8((unsigned char)(0xC0 | encode));
5393 }
5394 
5395 void Assembler::movdq(Register dst, XMMRegister src) {
5396   // table D-1 says MMX/SSE2
5397   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5398   // swap src/dst to get correct prefix
5399   int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66);
5400   emit_int8(0x7E);
5401   emit_int8((unsigned char)(0xC0 | encode));
5402 }
5403 
5404 void Assembler::movq(Register dst, Register src) {
5405   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5406   emit_int8((unsigned char)0x8B);
5407   emit_int8((unsigned char)(0xC0 | encode));
5408 }
5409 
5410 void Assembler::movq(Register dst, Address src) {
5411   InstructionMark im(this);
5412   prefixq(src, dst);
5413   emit_int8((unsigned char)0x8B);
5414   emit_operand(dst, src);
5415 }
5416 
5417 void Assembler::movq(Address dst, Register src) {
5418   InstructionMark im(this);
5419   prefixq(dst, src);

5512   emit_int8((unsigned char)0x0F);
5513   emit_int8((unsigned char)0xB7);
5514   emit_int8((unsigned char)(0xC0 | encode));
5515 }
5516 
5517 void Assembler::mulq(Address src) {
5518   InstructionMark im(this);
5519   prefixq(src);
5520   emit_int8((unsigned char)0xF7);
5521   emit_operand(rsp, src);
5522 }
5523 
5524 void Assembler::mulq(Register src) {
5525   int encode = prefixq_and_encode(src->encoding());
5526   emit_int8((unsigned char)0xF7);
5527   emit_int8((unsigned char)(0xE0 | encode));
5528 }
5529 
5530 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
5531   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
5532   int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, true, false);

5533   emit_int8((unsigned char)0xF6);
5534   emit_int8((unsigned char)(0xC0 | encode));
5535 }
5536 
5537 void Assembler::negq(Register dst) {
5538   int encode = prefixq_and_encode(dst->encoding());
5539   emit_int8((unsigned char)0xF7);
5540   emit_int8((unsigned char)(0xD8 | encode));
5541 }
5542 
5543 void Assembler::notq(Register dst) {
5544   int encode = prefixq_and_encode(dst->encoding());
5545   emit_int8((unsigned char)0xF7);
5546   emit_int8((unsigned char)(0xD0 | encode));
5547 }
5548 
5549 void Assembler::orq(Address dst, int32_t imm32) {
5550   InstructionMark im(this);
5551   prefixq(dst);
5552   emit_int8((unsigned char)0x81);

5661     emit_int8((unsigned char)(0xD0 | encode));
5662     emit_int8(imm8);
5663   }
5664 }
5665 
5666 void Assembler::rorq(Register dst, int imm8) {
5667   assert(isShiftCount(imm8 >> 1), "illegal shift count");
5668   int encode = prefixq_and_encode(dst->encoding());
5669   if (imm8 == 1) {
5670     emit_int8((unsigned char)0xD1);
5671     emit_int8((unsigned char)(0xC8 | encode));
5672   } else {
5673     emit_int8((unsigned char)0xC1);
5674     emit_int8((unsigned char)(0xc8 | encode));
5675     emit_int8(imm8);
5676   }
5677 }
5678 
5679 void Assembler::rorxq(Register dst, Register src, int imm8) {
5680   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
5681   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, true, false);

5682   emit_int8((unsigned char)0xF0);
5683   emit_int8((unsigned char)(0xC0 | encode));
5684   emit_int8(imm8);
5685 }
5686 
5687 void Assembler::sarq(Register dst, int imm8) {
5688   assert(isShiftCount(imm8 >> 1), "illegal shift count");
5689   int encode = prefixq_and_encode(dst->encoding());
5690   if (imm8 == 1) {
5691     emit_int8((unsigned char)0xD1);
5692     emit_int8((unsigned char)(0xF8 | encode));
5693   } else {
5694     emit_int8((unsigned char)0xC1);
5695     emit_int8((unsigned char)(0xF8 | encode));
5696     emit_int8(imm8);
5697   }
5698 }
5699 
5700 void Assembler::sarq(Register dst) {
5701   int encode = prefixq_and_encode(dst->encoding());

  37 #include "runtime/sharedRuntime.hpp"
  38 #include "runtime/stubRoutines.hpp"
  39 #include "utilities/macros.hpp"
  40 #if INCLUDE_ALL_GCS
  41 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  42 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
  43 #include "gc_implementation/g1/heapRegion.hpp"
  44 #endif // INCLUDE_ALL_GCS
  45 
  46 #ifdef PRODUCT
  47 #define BLOCK_COMMENT(str) /* nothing */
  48 #define STOP(error) stop(error)
  49 #else
  50 #define BLOCK_COMMENT(str) block_comment(str)
  51 #define STOP(error) block_comment(error); stop(error)
  52 #endif
  53 
  54 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  55 // Implementation of AddressLiteral
  56 
  57 // A 2-D table for managing compressed displacement(disp8) on EVEX enabled platforms.
  58 unsigned char tuple_table[Assembler::EVEX_ETUP + 1][Assembler::AVX_512bit + 1] = {
  59   // -----------------Table 4.5 -------------------- //
  60   16, 32, 64,  // EVEX_FV(0)
  61   4,  4,  4,   // EVEX_FV(1) - with Evex.b
  62   16, 32, 64,  // EVEX_FV(2) - with Evex.w
  63   8,  8,  8,   // EVEX_FV(3) - with Evex.w and Evex.b
  64   8,  16, 32,  // EVEX_HV(0)
  65   4,  4,  4,   // EVEX_HV(1) - with Evex.b
  66   // -----------------Table 4.6 -------------------- //
  67   16, 32, 64,  // EVEX_FVM(0)
  68   1,  1,  1,   // EVEX_T1S(0)
  69   2,  2,  2,   // EVEX_T1S(1)
  70   4,  4,  4,   // EVEX_T1S(2)
  71   8,  8,  8,   // EVEX_T1S(3)
  72   4,  4,  4,   // EVEX_T1F(0)
  73   8,  8,  8,   // EVEX_T1F(1)
  74   8,  8,  8,   // EVEX_T2(0)
  75   0,  16, 16,  // EVEX_T2(1)
  76   0,  16, 16,  // EVEX_T4(0)
  77   0,  0,  32,  // EVEX_T4(1)
  78   0,  0,  32,  // EVEX_T8(0)
  79   8,  16, 32,  // EVEX_HVM(0)
  80   4,  8,  16,  // EVEX_QVM(0)
  81   2,  4,  8,   // EVEX_OVM(0)
  82   16, 16, 16,  // EVEX_M128(0)
  83   8,  32, 64,  // EVEX_DUP(0)
  84   0,  0,  0    // EVEX_NTUP
  85 };
  86 
  87 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  88   _is_lval = false;
  89   _target = target;
  90   switch (rtype) {
  91   case relocInfo::oop_type:
  92   case relocInfo::metadata_type:
  93     // Oops are a special case. Normally they would be their own section
  94     // but in cases like icBuffer they are literals in the code stream that
  95     // we don't have a section for. We use none so that we get a literal address
  96     // which is always patchable.
  97     break;
  98   case relocInfo::external_word_type:
  99     _rspec = external_word_Relocation::spec(target);
 100     break;
 101   case relocInfo::internal_word_type:
 102     _rspec = internal_word_Relocation::spec(target);
 103     break;
 104   case relocInfo::opt_virtual_call_type:
 105     _rspec = opt_virtual_call_Relocation::spec();
 106     break;

 286   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 287   if (is8bit(imm32)) {
 288     emit_int8(op1 | 0x02); // set sign bit
 289     emit_operand(rm, adr, 1);
 290     emit_int8(imm32 & 0xFF);
 291   } else {
 292     emit_int8(op1);
 293     emit_operand(rm, adr, 4);
 294     emit_int32(imm32);
 295   }
 296 }
 297 
 298 
 299 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 300   assert(isByte(op1) && isByte(op2), "wrong opcode");
 301   emit_int8(op1);
 302   emit_int8(op2 | encode(dst) << 3 | encode(src));
 303 }
 304 
 305 
 306 bool Assembler::emit_compressed_disp_byte(int &disp) {
 307   int mod_idx = 0;
 308   // We will test if the displacement fits the compressed format and if so
 309   // apply the compression to the displacment iff the result is8bit.
 310   if (VM_Version::supports_evex() && is_evex_instruction) {
 311     switch (tuple_type) {
 312     case EVEX_FV:
 313       if ((evex_encoding & VEX_W) == VEX_W) {
 314         mod_idx += 2 + ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 315       } else {
 316         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 317       }
 318       break;
 319 
 320     case EVEX_HV:
 321       mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 322       break;
 323 
 324     case EVEX_FVM:
 325       break;
 326 
 327     case EVEX_T1S:
 328       switch (input_size_in_bits) {
 329       case EVEX_8bit:
 330         break;
 331 
 332       case EVEX_16bit:
 333         mod_idx = 1;
 334         break;
 335 
 336       case EVEX_32bit:
 337         mod_idx = 2;
 338         break;
 339 
 340       case EVEX_64bit:
 341         mod_idx = 3;
 342         break;
 343       }
 344       break;
 345 
 346     case EVEX_T1F:
 347     case EVEX_T2:
 348     case EVEX_T4:
 349       mod_idx = (input_size_in_bits == EVEX_64bit) ? 1 : 0;
 350       break;
 351 
 352     case EVEX_T8:
 353       break;
 354 
 355     case EVEX_HVM:
 356       break;
 357 
 358     case EVEX_QVM:
 359       break;
 360 
 361     case EVEX_OVM:
 362       break;
 363 
 364     case EVEX_M128:
 365       break;
 366 
 367     case EVEX_DUP:
 368       break;
 369 
 370     default:
 371       assert(0, "no valid evex tuple_table entry");
 372       break;
 373     }
 374 
 375     if (avx_vector_len >= AVX_128bit && avx_vector_len <= AVX_512bit) {
 376       int disp_factor = tuple_table[tuple_type + mod_idx][avx_vector_len];
 377       if ((disp % disp_factor) == 0) {
 378         int new_disp = disp / disp_factor;
 379         if (is8bit(new_disp)) {
 380           disp = new_disp;
 381         }
 382       } else {
 383         return false;
 384       }
 385     }
 386   }
 387   return is8bit(disp);
 388 }
 389 
 390 
 391 void Assembler::emit_operand(Register reg, Register base, Register index,
 392                              Address::ScaleFactor scale, int disp,
 393                              RelocationHolder const& rspec,
 394                              int rip_relative_correction) {
 395   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 396 
 397   // Encode the registers as needed in the fields they are used in
 398 
 399   int regenc = encode(reg) << 3;
 400   int indexenc = index->is_valid() ? encode(index) << 3 : 0;
 401   int baseenc = base->is_valid() ? encode(base) : 0;
 402 
 403   if (base->is_valid()) {
 404     if (index->is_valid()) {
 405       assert(scale != Address::no_scale, "inconsistent address");
 406       // [base + index*scale + disp]
 407       if (disp == 0 && rtype == relocInfo::none  &&
 408           base != rbp LP64_ONLY(&& base != r13)) {
 409         // [base + index*scale]
 410         // [00 reg 100][ss index base]
 411         assert(index != rsp, "illegal addressing mode");
 412         emit_int8(0x04 | regenc);
 413         emit_int8(scale << 6 | indexenc | baseenc);
 414       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 415         // [base + index*scale + imm8]
 416         // [01 reg 100][ss index base] imm8
 417         assert(index != rsp, "illegal addressing mode");
 418         emit_int8(0x44 | regenc);
 419         emit_int8(scale << 6 | indexenc | baseenc);
 420         emit_int8(disp & 0xFF);
 421       } else {
 422         // [base + index*scale + disp32]
 423         // [10 reg 100][ss index base] disp32
 424         assert(index != rsp, "illegal addressing mode");
 425         emit_int8(0x84 | regenc);
 426         emit_int8(scale << 6 | indexenc | baseenc);
 427         emit_data(disp, rspec, disp32_operand);
 428       }
 429     } else if (base == rsp LP64_ONLY(|| base == r12)) {
 430       // [rsp + disp]
 431       if (disp == 0 && rtype == relocInfo::none) {
 432         // [rsp]
 433         // [00 reg 100][00 100 100]
 434         emit_int8(0x04 | regenc);
 435         emit_int8(0x24);
 436       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 437         // [rsp + imm8]
 438         // [01 reg 100][00 100 100] disp8
 439         emit_int8(0x44 | regenc);
 440         emit_int8(0x24);
 441         emit_int8(disp & 0xFF);
 442       } else {
 443         // [rsp + imm32]
 444         // [10 reg 100][00 100 100] disp32
 445         emit_int8(0x84 | regenc);
 446         emit_int8(0x24);
 447         emit_data(disp, rspec, disp32_operand);
 448       }
 449     } else {
 450       // [base + disp]
 451       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
 452       if (disp == 0 && rtype == relocInfo::none &&
 453           base != rbp LP64_ONLY(&& base != r13)) {
 454         // [base]
 455         // [00 reg base]
 456         emit_int8(0x00 | regenc | baseenc);
 457       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 458         // [base + disp8]
 459         // [01 reg base] disp8
 460         emit_int8(0x40 | regenc | baseenc);
 461         emit_int8(disp & 0xFF);
 462       } else {
 463         // [base + disp32]
 464         // [10 reg base] disp32
 465         emit_int8(0x80 | regenc | baseenc);
 466         emit_data(disp, rspec, disp32_operand);
 467       }
 468     }
 469   } else {
 470     if (index->is_valid()) {
 471       assert(scale != Address::no_scale, "inconsistent address");
 472       // [index*scale + disp]
 473       // [00 reg 100][ss index 101] disp32
 474       assert(index != rsp, "illegal addressing mode");
 475       emit_int8(0x04 | regenc);
 476       emit_int8(scale << 6 | indexenc | 0x05);
 477       emit_data(disp, rspec, disp32_operand);

 487       // at the start of the instruction. That needs more correction here.
 488       // intptr_t disp = target - next_ip;
 489       assert(inst_mark() != NULL, "must be inside InstructionMark");
 490       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 491       int64_t adjusted = disp;
 492       // Do rip-rel adjustment for 64bit
 493       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 494       assert(is_simm32(adjusted),
 495              "must be 32bit offset (RIP relative address)");
 496       emit_data((int32_t) adjusted, rspec, disp32_operand);
 497 
 498     } else {
 499       // 32bit never did this, did everything as the rip-rel/disp code above
 500       // [disp] ABSOLUTE
 501       // [00 reg 100][00 100 101] disp32
 502       emit_int8(0x04 | regenc);
 503       emit_int8(0x25);
 504       emit_data(disp, rspec, disp32_operand);
 505     }
 506   }
 507   is_evex_instruction = false;
 508 }
 509 
 510 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 511                              Address::ScaleFactor scale, int disp,
 512                              RelocationHolder const& rspec) {
 513   if (UseAVX > 2) {
 514     int xreg_enc = reg->encoding();
 515     if (xreg_enc > 15) {
 516       XMMRegister new_reg = as_XMMRegister(xreg_enc & 0xf);
 517       emit_operand((Register)new_reg, base, index, scale, disp, rspec);
 518       return;
 519     }
 520   }
 521   emit_operand((Register)reg, base, index, scale, disp, rspec);
 522 }
 523 
 524 // Secret local extension to Assembler::WhichOperand:
 525 #define end_pc_operand (_WhichOperand_limit)
 526 
 527 address Assembler::locate_operand(address inst, WhichOperand which) {
 528   // Decode the given instruction, and return the address of
 529   // an embedded 32-bit operand word.
 530 
 531   // If "which" is disp32_operand, selects the displacement portion
 532   // of an effective address specifier.
 533   // If "which" is imm64_operand, selects the trailing immediate constant.
 534   // If "which" is call32_operand, selects the displacement of a call or jump.
 535   // Caller is responsible for ensuring that there is such an operand,
 536   // and that it is 32/64 bits wide.
 537 
 538   // If "which" is end_pc_operand, find the end of the instruction.
 539 
 540   address ip = inst;

 793     // First byte
 794     if ((0xFF & *inst) == VEX_3bytes) {
 795       ip++; // third byte
 796       is_64bit = ((VEX_W & *ip) == VEX_W);
 797     }
 798     ip++; // opcode
 799     // To find the end of instruction (which == end_pc_operand).
 800     switch (0xFF & *ip) {
 801     case 0x61: // pcmpestri r, r/a, #8
 802     case 0x70: // pshufd r, r/a, #8
 803     case 0x73: // psrldq r, #8
 804       tail_size = 1;  // the imm8
 805       break;
 806     default:
 807       break;
 808     }
 809     ip++; // skip opcode
 810     debug_only(has_disp32 = true); // has both kinds of operands!
 811     break;
 812 
 813   case 0x62: // EVEX_4bytes
 814     assert((UseAVX > 0), "shouldn't have EVEX prefix");
 815     assert(ip == inst+1, "no prefixes allowed");
 816     // no EVEX collisions, all instructions that have 0x62 opcodes
 817     // have EVEX versions and are subopcodes of 0x66
 818     ip++; // skip P0 and exmaine W in P1
 819     is_64bit = ((VEX_W & *ip) == VEX_W);
 820     ip++; // move to P2
 821     ip++; // skip P2, move to opcode
 822     // To find the end of instruction (which == end_pc_operand).
 823     switch (0xFF & *ip) {
 824     case 0x61: // pcmpestri r, r/a, #8
 825     case 0x70: // pshufd r, r/a, #8
 826     case 0x73: // psrldq r, #8
 827       tail_size = 1;  // the imm8
 828       break;
 829     default:
 830       break;
 831     }
 832     ip++; // skip opcode
 833     debug_only(has_disp32 = true); // has both kinds of operands!
 834     break;
 835 
 836   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 837   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 838   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 839   case 0xDD: // fld_d a; fst_d a; fstp_d a
 840   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 841   case 0xDF: // fild_d a; fistp_d a
 842   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 843   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 844   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 845     debug_only(has_disp32 = true);
 846     break;
 847 
 848   case 0xE8: // call rdisp32
 849   case 0xE9: // jmp  rdisp32
 850     if (which == end_pc_operand)  return ip + 4;
 851     assert(which == call32_operand, "call has no disp32 or imm");
 852     return ip;
 853 
 854   case 0xF0:                    // Lock
 855     assert(os::is_MP(), "only on MP");

1115   emit_int8(0x0F);
1116   emit_int8(0x1F);
1117   emit_int8((unsigned char)0x80);
1118                    // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
1119   emit_int32(0);   // 32-bits offset (4 bytes)
1120 }
1121 
1122 void Assembler::addr_nop_8() {
1123   assert(UseAddressNop, "no CPU support");
1124   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1125   emit_int8(0x0F);
1126   emit_int8(0x1F);
1127   emit_int8((unsigned char)0x84);
1128                    // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
1129   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1130   emit_int32(0);   // 32-bits offset (4 bytes)
1131 }
1132 
1133 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
1134   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1135   if (VM_Version::supports_evex()) {
1136     emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2);
1137   } else {
1138     emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
1139   }
1140 }
1141 
1142 void Assembler::addsd(XMMRegister dst, Address src) {
1143   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1144   if (VM_Version::supports_evex()) {
1145     tuple_type = EVEX_T1S;
1146     input_size_in_bits = EVEX_64bit;
1147     emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2);
1148   } else {
1149     emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
1150   }
1151 }
1152 
1153 void Assembler::addss(XMMRegister dst, XMMRegister src) {
1154   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1155   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
1156 }
1157 
1158 void Assembler::addss(XMMRegister dst, Address src) {
1159   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1160   if (VM_Version::supports_evex()) {
1161     tuple_type = EVEX_T1S;
1162     input_size_in_bits = EVEX_32bit;
1163   }
1164   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
1165 }
1166 
1167 void Assembler::aesdec(XMMRegister dst, Address src) {
1168   assert(VM_Version::supports_aes(), "");
1169   InstructionMark im(this);
1170   simd_prefix(dst, dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38, false, AVX_128bit, true);
1171   emit_int8((unsigned char)0xDE);
1172   emit_operand(dst, src);
1173 }
1174 
1175 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1176   assert(VM_Version::supports_aes(), "");
1177   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38, false, AVX_128bit, true);
1178   emit_int8((unsigned char)0xDE);
1179   emit_int8(0xC0 | encode);
1180 }
1181 
1182 void Assembler::aesdeclast(XMMRegister dst, Address src) {
1183   assert(VM_Version::supports_aes(), "");
1184   InstructionMark im(this);
1185   simd_prefix(dst, dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38, false, AVX_128bit, true);
1186   emit_int8((unsigned char)0xDF);
1187   emit_operand(dst, src);
1188 }
1189 
1190 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1191   assert(VM_Version::supports_aes(), "");
1192   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38, false, AVX_128bit, true);
1193   emit_int8((unsigned char)0xDF);
1194   emit_int8((unsigned char)(0xC0 | encode));
1195 }
1196 
1197 void Assembler::aesenc(XMMRegister dst, Address src) {
1198   assert(VM_Version::supports_aes(), "");
1199   InstructionMark im(this);
1200   simd_prefix(dst, dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38, false, AVX_128bit, true);
1201   emit_int8((unsigned char)0xDC);
1202   emit_operand(dst, src);
1203 }
1204 
1205 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1206   assert(VM_Version::supports_aes(), "");
1207   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38, false, AVX_128bit, true);
1208   emit_int8((unsigned char)0xDC);
1209   emit_int8(0xC0 | encode);
1210 }
1211 
1212 void Assembler::aesenclast(XMMRegister dst, Address src) {
1213   assert(VM_Version::supports_aes(), "");
1214   InstructionMark im(this);
1215   simd_prefix(dst, dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38, false, AVX_128bit, true);
1216   emit_int8((unsigned char)0xDD);
1217   emit_operand(dst, src);
1218 }
1219 
1220 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1221   assert(VM_Version::supports_aes(), "");
1222   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38, false, AVX_128bit, true);
1223   emit_int8((unsigned char)0xDD);
1224   emit_int8((unsigned char)(0xC0 | encode));
1225 }
1226 
1227 
1228 void Assembler::andl(Address dst, int32_t imm32) {
1229   InstructionMark im(this);
1230   prefix(dst);
1231   emit_int8((unsigned char)0x81);
1232   emit_operand(rsp, dst, 4);
1233   emit_int32(imm32);
1234 }
1235 
1236 void Assembler::andl(Register dst, int32_t imm32) {
1237   prefix(dst);
1238   emit_arith(0x81, 0xE0, dst, imm32);
1239 }
1240 
1241 void Assembler::andl(Register dst, Address src) {
1242   InstructionMark im(this);
1243   prefix(src, dst);
1244   emit_int8(0x23);
1245   emit_operand(dst, src);
1246 }
1247 
1248 void Assembler::andl(Register dst, Register src) {
1249   (void) prefix_and_encode(dst->encoding(), src->encoding());
1250   emit_arith(0x23, 0xC0, dst, src);
1251 }
1252 
1253 void Assembler::andnl(Register dst, Register src1, Register src2) {
1254   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1255   int encode = vex_prefix_0F38_and_encode(dst, src1, src2, false);
1256   emit_int8((unsigned char)0xF2);
1257   emit_int8((unsigned char)(0xC0 | encode));
1258 }
1259 
1260 void Assembler::andnl(Register dst, Register src1, Address src2) {
1261   InstructionMark im(this);
1262   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1263   vex_prefix_0F38(dst, src1, src2, false);
1264   emit_int8((unsigned char)0xF2);
1265   emit_operand(dst, src2);
1266 }
1267 
1268 void Assembler::bsfl(Register dst, Register src) {
1269   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1270   emit_int8(0x0F);
1271   emit_int8((unsigned char)0xBC);
1272   emit_int8((unsigned char)(0xC0 | encode));
1273 }
1274 
1275 void Assembler::bsrl(Register dst, Register src) {
1276   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1277   emit_int8(0x0F);
1278   emit_int8((unsigned char)0xBD);
1279   emit_int8((unsigned char)(0xC0 | encode));
1280 }
1281 
1282 void Assembler::bswapl(Register reg) { // bswap
1283   int encode = prefix_and_encode(reg->encoding());
1284   emit_int8(0x0F);
1285   emit_int8((unsigned char)(0xC8 | encode));
1286 }
1287 
1288 void Assembler::blsil(Register dst, Register src) {
1289   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1290   int encode = vex_prefix_0F38_and_encode(rbx, dst, src, false);
1291   emit_int8((unsigned char)0xF3);
1292   emit_int8((unsigned char)(0xC0 | encode));
1293 }
1294 
1295 void Assembler::blsil(Register dst, Address src) {
1296   InstructionMark im(this);
1297   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1298   vex_prefix_0F38(rbx, dst, src, false);
1299   emit_int8((unsigned char)0xF3);
1300   emit_operand(rbx, src);
1301 }
1302 
1303 void Assembler::blsmskl(Register dst, Register src) {
1304   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1305   int encode = vex_prefix_0F38_and_encode(rdx, dst, src, false);
1306   emit_int8((unsigned char)0xF3);
1307   emit_int8((unsigned char)(0xC0 | encode));
1308 }
1309 
1310 void Assembler::blsmskl(Register dst, Address src) {
1311   InstructionMark im(this);
1312   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1313   vex_prefix_0F38(rdx, dst, src, false);
1314   emit_int8((unsigned char)0xF3);
1315   emit_operand(rdx, src);
1316 }
1317 
1318 void Assembler::blsrl(Register dst, Register src) {
1319   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1320   int encode = vex_prefix_0F38_and_encode(rcx, dst, src, false);
1321   emit_int8((unsigned char)0xF3);
1322   emit_int8((unsigned char)(0xC0 | encode));
1323 }
1324 
1325 void Assembler::blsrl(Register dst, Address src) {
1326   InstructionMark im(this);
1327   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1328   vex_prefix_0F38(rcx, dst, src, false);
1329   emit_int8((unsigned char)0xF3);
1330   emit_operand(rcx, src);
1331 }
1332 
1333 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1334   // suspect disp32 is always good
1335   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1336 
1337   if (L.is_bound()) {
1338     const int long_size = 5;
1339     int offs = (int)( target(L) - pc() );
1340     assert(offs <= 0, "assembler error");
1341     InstructionMark im(this);
1342     // 1110 1000 #32-bit disp
1343     emit_int8((unsigned char)0xE8);
1344     emit_data(offs - long_size, rtype, operand);
1345   } else {
1346     InstructionMark im(this);
1347     // 1110 1000 #32-bit disp
1348     L.add_patch_at(code(), locator());

1456   emit_int8(0x0F);
1457   emit_int8((unsigned char)0xB1);
1458   emit_operand(reg, adr);
1459 }
1460 
1461 // The 8-bit cmpxchg compares the value at adr with the contents of rax,
1462 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1463 // The ZF is set if the compared values were equal, and cleared otherwise.
1464 void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg
1465   InstructionMark im(this);
1466   prefix(adr, reg, true);
1467   emit_int8(0x0F);
1468   emit_int8((unsigned char)0xB0);
1469   emit_operand(reg, adr);
1470 }
1471 
1472 void Assembler::comisd(XMMRegister dst, Address src) {
1473   // NOTE: dbx seems to decode this as comiss even though the
1474   // 0x66 is there. Strangly ucomisd comes out correct
1475   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1476   if (VM_Version::supports_evex()) {
1477     tuple_type = EVEX_T1S;
1478     input_size_in_bits = EVEX_64bit;
1479     emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, true);
1480   } else {
1481     emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1482   }
1483 }
1484 
1485 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1486   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1487   if (VM_Version::supports_evex()) {
1488     emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, true);
1489   } else {
1490     emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1491   }
1492 }
1493 
1494 void Assembler::comiss(XMMRegister dst, Address src) {
1495   if (VM_Version::supports_evex()) {
1496     tuple_type = EVEX_T1S;
1497     input_size_in_bits = EVEX_32bit;
1498   }
1499   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1500   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, true);
1501 }
1502 
1503 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1504   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1505   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, true);
1506 }
1507 
1508 void Assembler::cpuid() {
1509   emit_int8(0x0F);
1510   emit_int8((unsigned char)0xA2);
1511 }
1512 
1513 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1514   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1515   emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3);
1516 }
1517 
1518 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1519   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1520   emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE);
1521 }
1522 
1523 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1524   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1525   if (VM_Version::supports_evex()) {
1526     emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2);
1527   } else {
1528     emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1529   }
1530 }
1531 
1532 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1533   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1534   if (VM_Version::supports_evex()) {
1535     tuple_type = EVEX_T1F;
1536     input_size_in_bits = EVEX_64bit;
1537     emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2);
1538   } else {
1539     emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1540   }
1541 }
1542 
1543 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1544   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1545   int encode = 0;
1546   if (VM_Version::supports_evex()) {
1547     encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2, true);
1548   } else {
1549     encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, false);
1550   }
1551   emit_int8(0x2A);
1552   emit_int8((unsigned char)(0xC0 | encode));
1553 }
1554 
1555 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1556   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1557   if (VM_Version::supports_evex()) {
1558     tuple_type = EVEX_T1S;
1559     input_size_in_bits = EVEX_32bit;
1560     emit_simd_arith_q(0x2A, dst, src, VEX_SIMD_F2, true);
1561   } else {
1562     emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
1563   }
1564 }
1565 
1566 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1567   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1568   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, true);
1569   emit_int8(0x2A);
1570   emit_int8((unsigned char)(0xC0 | encode));
1571 }
1572 
1573 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1574   if (VM_Version::supports_evex()) {
1575     tuple_type = EVEX_T1S;
1576     input_size_in_bits = EVEX_32bit;
1577   }
1578   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1579   emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3, true);
1580 }
1581 
1582 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1583   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1584   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1585 }
1586 
1587 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1588   if (VM_Version::supports_evex()) {
1589     tuple_type = EVEX_T1S;
1590     input_size_in_bits = EVEX_32bit;
1591   }
1592   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1593   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1594 }
1595 
1596 
1597 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1598   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1599   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, true);
1600   emit_int8(0x2C);
1601   emit_int8((unsigned char)(0xC0 | encode));
1602 }
1603 
1604 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1605   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1606   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, true);
1607   emit_int8(0x2C);
1608   emit_int8((unsigned char)(0xC0 | encode));
1609 }
1610 
1611 void Assembler::decl(Address dst) {
1612   // Don't use it directly. Use MacroAssembler::decrement() instead.
1613   InstructionMark im(this);
1614   prefix(dst);
1615   emit_int8((unsigned char)0xFF);
1616   emit_operand(rcx, dst);
1617 }
1618 
1619 void Assembler::divsd(XMMRegister dst, Address src) {
1620   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1621   if (VM_Version::supports_evex()) {
1622     tuple_type = EVEX_T1S;
1623     input_size_in_bits = EVEX_64bit;
1624     emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2);
1625   } else {
1626     emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1627   }
1628 }
1629 
1630 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1631   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1632   if (VM_Version::supports_evex()) {
1633     emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2);
1634   } else {
1635     emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1636   }
1637 }
1638 
1639 void Assembler::divss(XMMRegister dst, Address src) {
1640   if (VM_Version::supports_evex()) {
1641     tuple_type = EVEX_T1S;
1642     input_size_in_bits = EVEX_32bit;
1643   }
1644   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1645   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1646 }
1647 
1648 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1649   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1650   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1651 }
1652 
1653 void Assembler::emms() {
1654   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1655   emit_int8(0x0F);
1656   emit_int8(0x77);
1657 }
1658 
1659 void Assembler::hlt() {
1660   emit_int8((unsigned char)0xF4);
1661 }
1662 
1663 void Assembler::idivl(Register src) {

1876   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1877   emit_int8(0x0F);
1878   emit_int8((unsigned char)0xBD);
1879   emit_int8((unsigned char)(0xC0 | encode));
1880 }
1881 
1882 // Emit mfence instruction
1883 void Assembler::mfence() {
1884   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
1885   emit_int8(0x0F);
1886   emit_int8((unsigned char)0xAE);
1887   emit_int8((unsigned char)0xF0);
1888 }
1889 
1890 void Assembler::mov(Register dst, Register src) {
1891   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
1892 }
1893 
1894 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1895   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1896   if (VM_Version::supports_evex()) {
1897     emit_simd_arith_nonds_q(0x28, dst, src, VEX_SIMD_66, true);
1898   }
1899   else {
1900     emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
1901   }
1902 }
1903 
1904 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1905   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1906   emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE);
1907 }
1908 
1909 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
1910   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1911   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, true, VEX_OPCODE_0F,
1912                                       false, AVX_128bit);
1913   emit_int8(0x16);
1914   emit_int8((unsigned char)(0xC0 | encode));
1915 }
1916 
1917 void Assembler::movb(Register dst, Address src) {
1918   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
1919   InstructionMark im(this);
1920   prefix(src, dst, true);
1921   emit_int8((unsigned char)0x8A);
1922   emit_operand(dst, src);
1923 }
1924 
1925 void Assembler::kmovq(KRegister dst, KRegister src) {
1926   NOT_LP64(assert(VM_Version::supports_evex(), ""));
1927   int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, true, VEX_OPCODE_0F, true);
1928   emit_int8((unsigned char)0x90);
1929   emit_int8((unsigned char)(0xC0 | encode));
1930 }
1931 
1932 void Assembler::kmovq(KRegister dst, Address src) {
1933   NOT_LP64(assert(VM_Version::supports_evex(), ""));
1934   int dst_enc = dst->encoding();
1935   int nds_enc = 0;
1936   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_NONE, VEX_OPCODE_0F, true, AVX_128bit, true, true);
1937   emit_int8((unsigned char)0x90);
1938   emit_operand((Register)dst, src);
1939 }
1940 
1941 void Assembler::kmovq(Address dst, KRegister src) {
1942   NOT_LP64(assert(VM_Version::supports_evex(), ""));
1943   int src_enc = src->encoding();
1944   int nds_enc = 0;
1945   vex_prefix(dst, nds_enc, src_enc, VEX_SIMD_NONE, VEX_OPCODE_0F, true, AVX_128bit, true, true);
1946   emit_int8((unsigned char)0x90);
1947   emit_operand((Register)src, dst);
1948 }
1949 
1950 void Assembler::kmovql(KRegister dst, Register src) {
1951   NOT_LP64(assert(VM_Version::supports_evex(), ""));
1952   int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_F2, true, VEX_OPCODE_0F, true);
1953   emit_int8((unsigned char)0x92);
1954   emit_int8((unsigned char)(0xC0 | encode));
1955 }
1956 
1957 void Assembler::movb(Address dst, int imm8) {
1958   InstructionMark im(this);
1959    prefix(dst);
1960   emit_int8((unsigned char)0xC6);
1961   emit_operand(rax, dst, 1);
1962   emit_int8(imm8);
1963 }
1964 
1965 
1966 void Assembler::movb(Address dst, Register src) {
1967   assert(src->has_byte_register(), "must have byte register");
1968   InstructionMark im(this);
1969   prefix(dst, src, true);
1970   emit_int8((unsigned char)0x88);
1971   emit_operand(src, dst);
1972 }
1973 
1974 void Assembler::movdl(XMMRegister dst, Register src) {
1975   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1976   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, true);
1977   emit_int8(0x6E);
1978   emit_int8((unsigned char)(0xC0 | encode));
1979 }
1980 
1981 void Assembler::movdl(Register dst, XMMRegister src) {
1982   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1983   // swap src/dst to get correct prefix
1984   int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66, true);
1985   emit_int8(0x7E);
1986   emit_int8((unsigned char)(0xC0 | encode));
1987 }
1988 
1989 void Assembler::movdl(XMMRegister dst, Address src) {
1990   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1991   if (VM_Version::supports_evex()) {
1992     tuple_type = EVEX_T1S;
1993     input_size_in_bits = EVEX_32bit;
1994   }
1995   InstructionMark im(this);
1996   simd_prefix(dst, src, VEX_SIMD_66, true, VEX_OPCODE_0F);
1997   emit_int8(0x6E);
1998   emit_operand(dst, src);
1999 }
2000 
2001 void Assembler::movdl(Address dst, XMMRegister src) {
2002   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2003   if (VM_Version::supports_evex()) {
2004     tuple_type = EVEX_T1S;
2005     input_size_in_bits = EVEX_32bit;
2006   }
2007   InstructionMark im(this);
2008   simd_prefix(dst, src, VEX_SIMD_66, true);
2009   emit_int8(0x7E);
2010   emit_operand(src, dst);
2011 }
2012 
2013 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
2014   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2015   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
2016 }
2017 
2018 void Assembler::movdqa(XMMRegister dst, Address src) {
2019   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2020   if (VM_Version::supports_evex()) {
2021     tuple_type = EVEX_FVM;
2022   }
2023   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
2024 }
2025 
2026 void Assembler::movdqu(XMMRegister dst, Address src) {
2027   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2028   if (VM_Version::supports_evex()) {
2029     tuple_type = EVEX_FVM;
2030   }
2031   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
2032 }
2033 
2034 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
2035   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2036   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
2037 }
2038 
2039 void Assembler::movdqu(Address dst, XMMRegister src) {
2040   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2041   if (VM_Version::supports_evex()) {
2042     tuple_type = EVEX_FVM;
2043   }
2044   InstructionMark im(this);
2045   simd_prefix(dst, src, VEX_SIMD_F3, false);
2046   emit_int8(0x7F);
2047   emit_operand(src, dst);
2048 }
2049 
2050 // Move Unaligned 256bit Vector
2051 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2052   assert(UseAVX > 0, "");
2053   if (VM_Version::supports_evex()) {
2054     tuple_type = EVEX_FVM;
2055   }
2056   int vector_len = AVX_256bit;
2057   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
2058   emit_int8(0x6F);
2059   emit_int8((unsigned char)(0xC0 | encode));
2060 }
2061 
2062 void Assembler::vmovdqu(XMMRegister dst, Address src) {
2063   assert(UseAVX > 0, "");
2064   if (VM_Version::supports_evex()) {
2065     tuple_type = EVEX_FVM;
2066   }
2067   InstructionMark im(this);
2068   int vector_len = AVX_256bit;
2069   vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len, false);
2070   emit_int8(0x6F);
2071   emit_operand(dst, src);
2072 }
2073 
2074 void Assembler::vmovdqu(Address dst, XMMRegister src) {
2075   assert(UseAVX > 0, "");
2076   if (VM_Version::supports_evex()) {
2077     tuple_type = EVEX_FVM;
2078   }
2079   InstructionMark im(this);
2080   int vector_len = AVX_256bit;
2081   // swap src<->dst for encoding
2082   assert(src != xnoreg, "sanity");
2083   vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len, false);
2084   emit_int8(0x7F);
2085   emit_operand(src, dst);
2086 }
2087 
2088 // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
2089 void Assembler::evmovdqu(XMMRegister dst, XMMRegister src, int vector_len) {
2090   assert(UseAVX > 0, "");
2091   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
2092   emit_int8(0x6F);
2093   emit_int8((unsigned char)(0xC0 | encode));
2094 }
2095 
2096 void Assembler::evmovdqu(XMMRegister dst, Address src, int vector_len) {
2097   assert(UseAVX > 0, "");
2098   if (VM_Version::supports_evex()) {
2099     tuple_type = EVEX_FVM;
2100   }
2101   InstructionMark im(this);
2102   vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len, false);
2103   emit_int8(0x6F);
2104   emit_operand(dst, src);
2105 }
2106 
2107 void Assembler::evmovdqu(Address dst, XMMRegister src, int vector_len) {
2108   assert(UseAVX > 0, "");
2109   if (VM_Version::supports_evex()) {
2110     tuple_type = EVEX_FVM;
2111   }
2112   InstructionMark im(this);

2113   // swap src<->dst for encoding
2114   assert(src != xnoreg, "sanity");
2115   vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len, false);
2116   emit_int8(0x7F);
2117   emit_operand(src, dst);
2118 }
2119 
2120 // Uses zero extension on 64bit
2121 
2122 void Assembler::movl(Register dst, int32_t imm32) {
2123   int encode = prefix_and_encode(dst->encoding());
2124   emit_int8((unsigned char)(0xB8 | encode));
2125   emit_int32(imm32);
2126 }
2127 
2128 void Assembler::movl(Register dst, Register src) {
2129   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2130   emit_int8((unsigned char)0x8B);
2131   emit_int8((unsigned char)(0xC0 | encode));
2132 }
2133 
2134 void Assembler::movl(Register dst, Address src) {
2135   InstructionMark im(this);

2141 void Assembler::movl(Address dst, int32_t imm32) {
2142   InstructionMark im(this);
2143   prefix(dst);
2144   emit_int8((unsigned char)0xC7);
2145   emit_operand(rax, dst, 4);
2146   emit_int32(imm32);
2147 }
2148 
2149 void Assembler::movl(Address dst, Register src) {
2150   InstructionMark im(this);
2151   prefix(dst, src);
2152   emit_int8((unsigned char)0x89);
2153   emit_operand(src, dst);
2154 }
2155 
2156 // New cpus require to use movsd and movss to avoid partial register stall
2157 // when loading from memory. But for old Opteron use movlpd instead of movsd.
2158 // The selection is done in MacroAssembler::movdbl() and movflt().
2159 void Assembler::movlpd(XMMRegister dst, Address src) {
2160   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2161   if (VM_Version::supports_evex()) {
2162     tuple_type = EVEX_T1S;
2163     input_size_in_bits = EVEX_32bit;
2164   }
2165   emit_simd_arith(0x12, dst, src, VEX_SIMD_66, true);
2166 }
2167 
2168 void Assembler::movq( MMXRegister dst, Address src ) {
2169   assert( VM_Version::supports_mmx(), "" );
2170   emit_int8(0x0F);
2171   emit_int8(0x6F);
2172   emit_operand(dst, src);
2173 }
2174 
2175 void Assembler::movq( Address dst, MMXRegister src ) {
2176   assert( VM_Version::supports_mmx(), "" );
2177   emit_int8(0x0F);
2178   emit_int8(0x7F);
2179   // workaround gcc (3.2.1-7a) bug
2180   // In that version of gcc with only an emit_operand(MMX, Address)
2181   // gcc will tail jump and try and reverse the parameters completely
2182   // obliterating dst in the process. By having a version available
2183   // that doesn't need to swap the args at the tail jump the bug is
2184   // avoided.
2185   emit_operand(dst, src);
2186 }
2187 
2188 void Assembler::movq(XMMRegister dst, Address src) {
2189   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2190   InstructionMark im(this);
2191   if (VM_Version::supports_evex()) {
2192     tuple_type = EVEX_T1S;
2193     input_size_in_bits = EVEX_64bit;
2194     simd_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, true);
2195   } else {
2196     simd_prefix(dst, src, VEX_SIMD_F3, true, VEX_OPCODE_0F);
2197   }
2198   emit_int8(0x7E);
2199   emit_operand(dst, src);
2200 }
2201 
2202 void Assembler::movq(Address dst, XMMRegister src) {
2203   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2204   InstructionMark im(this);
2205   if (VM_Version::supports_evex()) {
2206     tuple_type = EVEX_T1S;
2207     input_size_in_bits = EVEX_64bit;
2208     simd_prefix(src, xnoreg, dst, VEX_SIMD_66, true, VEX_OPCODE_0F, true, AVX_128bit);
2209   } else {
2210     simd_prefix(dst, src, VEX_SIMD_66, true);
2211   }
2212   emit_int8((unsigned char)0xD6);
2213   emit_operand(src, dst);
2214 }
2215 
2216 void Assembler::movsbl(Register dst, Address src) { // movsxb
2217   InstructionMark im(this);
2218   prefix(src, dst);
2219   emit_int8(0x0F);
2220   emit_int8((unsigned char)0xBE);
2221   emit_operand(dst, src);
2222 }
2223 
2224 void Assembler::movsbl(Register dst, Register src) { // movsxb
2225   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2226   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
2227   emit_int8(0x0F);
2228   emit_int8((unsigned char)0xBE);
2229   emit_int8((unsigned char)(0xC0 | encode));
2230 }
2231 
2232 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
2233   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2234   if (VM_Version::supports_evex()) {
2235     emit_simd_arith_q(0x10, dst, src, VEX_SIMD_F2, true);
2236   } else {
2237     emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
2238   }
2239 }
2240 
2241 void Assembler::movsd(XMMRegister dst, Address src) {
2242   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2243   if (VM_Version::supports_evex()) {
2244     tuple_type = EVEX_T1S;
2245     input_size_in_bits = EVEX_64bit;
2246     emit_simd_arith_nonds_q(0x10, dst, src, VEX_SIMD_F2, true);
2247   } else {
2248     emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
2249   }
2250 }
2251 
2252 void Assembler::movsd(Address dst, XMMRegister src) {
2253   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2254   InstructionMark im(this);
2255   if (VM_Version::supports_evex()) {
2256     tuple_type = EVEX_T1S;
2257     input_size_in_bits = EVEX_64bit;
2258     simd_prefix_q(src, xnoreg, dst, VEX_SIMD_F2);
2259   } else {
2260     simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, false);
2261   }
2262   emit_int8(0x11);
2263   emit_operand(src, dst);
2264 }
2265 
2266 void Assembler::movss(XMMRegister dst, XMMRegister src) {
2267   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2268   emit_simd_arith(0x10, dst, src, VEX_SIMD_F3, true);
2269 }
2270 
2271 void Assembler::movss(XMMRegister dst, Address src) {
2272   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2273   if (VM_Version::supports_evex()) {
2274     tuple_type = EVEX_T1S;
2275     input_size_in_bits = EVEX_32bit;
2276   }
2277   emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3, true);
2278 }
2279 
2280 void Assembler::movss(Address dst, XMMRegister src) {
2281   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2282   if (VM_Version::supports_evex()) {
2283     tuple_type = EVEX_T1S;
2284     input_size_in_bits = EVEX_32bit;
2285   }
2286   InstructionMark im(this);
2287   simd_prefix(dst, src, VEX_SIMD_F3, false);
2288   emit_int8(0x11);
2289   emit_operand(src, dst);
2290 }
2291 
2292 void Assembler::movswl(Register dst, Address src) { // movsxw
2293   InstructionMark im(this);
2294   prefix(src, dst);
2295   emit_int8(0x0F);
2296   emit_int8((unsigned char)0xBF);
2297   emit_operand(dst, src);
2298 }
2299 
2300 void Assembler::movswl(Register dst, Register src) { // movsxw
2301   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2302   emit_int8(0x0F);
2303   emit_int8((unsigned char)0xBF);
2304   emit_int8((unsigned char)(0xC0 | encode));
2305 }
2306 
2307 void Assembler::movw(Address dst, int imm16) {

2359   emit_int8(0x0F);
2360   emit_int8((unsigned char)0xB7);
2361   emit_int8(0xC0 | encode);
2362 }
2363 
2364 void Assembler::mull(Address src) {
2365   InstructionMark im(this);
2366   prefix(src);
2367   emit_int8((unsigned char)0xF7);
2368   emit_operand(rsp, src);
2369 }
2370 
2371 void Assembler::mull(Register src) {
2372   int encode = prefix_and_encode(src->encoding());
2373   emit_int8((unsigned char)0xF7);
2374   emit_int8((unsigned char)(0xE0 | encode));
2375 }
2376 
2377 void Assembler::mulsd(XMMRegister dst, Address src) {
2378   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2379   if (VM_Version::supports_evex()) {
2380     tuple_type = EVEX_T1S;
2381     input_size_in_bits = EVEX_64bit;
2382     emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2);
2383   } else {
2384     emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
2385   }
2386 }
2387 
2388 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
2389   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2390   if (VM_Version::supports_evex()) {
2391     emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2);
2392   } else {
2393     emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
2394   }
2395 }
2396 
2397 void Assembler::mulss(XMMRegister dst, Address src) {
2398   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2399   if (VM_Version::supports_evex()) {
2400     tuple_type = EVEX_T1S;
2401     input_size_in_bits = EVEX_32bit;
2402   }
2403   emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
2404 }
2405 
2406 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
2407   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2408   emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
2409 }
2410 
2411 void Assembler::negl(Register dst) {
2412   int encode = prefix_and_encode(dst->encoding());
2413   emit_int8((unsigned char)0xF7);
2414   emit_int8((unsigned char)(0xD8 | encode));
2415 }
2416 
2417 void Assembler::nop(int i) {
2418 #ifdef ASSERT
2419   assert(i > 0, " ");
2420   // The fancy nops aren't currently recognized by debuggers making it a
2421   // pain to disassemble code while debugging. If asserts are on clearly
2422   // speed is not an issue so simply use the single byte traditional nop

2682 void Assembler::orl(Register dst, int32_t imm32) {
2683   prefix(dst);
2684   emit_arith(0x81, 0xC8, dst, imm32);
2685 }
2686 
2687 void Assembler::orl(Register dst, Address src) {
2688   InstructionMark im(this);
2689   prefix(src, dst);
2690   emit_int8(0x0B);
2691   emit_operand(dst, src);
2692 }
2693 
2694 void Assembler::orl(Register dst, Register src) {
2695   (void) prefix_and_encode(dst->encoding(), src->encoding());
2696   emit_arith(0x0B, 0xC0, dst, src);
2697 }
2698 
2699 void Assembler::packuswb(XMMRegister dst, Address src) {
2700   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2701   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2702   if (VM_Version::supports_evex()) {
2703     tuple_type = EVEX_FV;
2704     input_size_in_bits = EVEX_32bit;
2705   }
2706   emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
2707 }
2708 
2709 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
2710   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2711   emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
2712 }
2713 
2714 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
2715   assert(UseAVX > 0, "some form of AVX must be enabled");
2716   emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector_len);
2717 }
2718 
2719 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
2720   assert(VM_Version::supports_avx2(), "");
2721   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, false, VEX_OPCODE_0F_3A, true, vector_len);
2722   emit_int8(0x00);
2723   emit_int8(0xC0 | encode);
2724   emit_int8(imm8);
2725 }
2726 
2727 void Assembler::pause() {
2728   emit_int8((unsigned char)0xF3);
2729   emit_int8((unsigned char)0x90);
2730 }
2731 
2732 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
2733   assert(VM_Version::supports_sse4_2(), "");
2734   InstructionMark im(this);
2735   simd_prefix(dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_3A);
2736   emit_int8(0x61);
2737   emit_operand(dst, src);
2738   emit_int8(imm8);
2739 }
2740 
2741 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
2742   assert(VM_Version::supports_sse4_2(), "");
2743   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, false, VEX_OPCODE_0F_3A);
2744   emit_int8(0x61);
2745   emit_int8((unsigned char)(0xC0 | encode));
2746   emit_int8(imm8);
2747 }
2748 
2749 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
2750   assert(VM_Version::supports_sse4_1(), "");
2751   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, true, VEX_OPCODE_0F_3A);
2752   emit_int8(0x16);
2753   emit_int8((unsigned char)(0xC0 | encode));
2754   emit_int8(imm8);
2755 }
2756 
2757 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
2758   assert(VM_Version::supports_sse4_1(), "");
2759   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, true, VEX_OPCODE_0F_3A);
2760   emit_int8(0x16);
2761   emit_int8((unsigned char)(0xC0 | encode));
2762   emit_int8(imm8);
2763 }
2764 
2765 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
2766   assert(VM_Version::supports_sse4_1(), "");
2767   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, true, VEX_OPCODE_0F_3A);
2768   emit_int8(0x22);
2769   emit_int8((unsigned char)(0xC0 | encode));
2770   emit_int8(imm8);
2771 }
2772 
2773 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
2774   assert(VM_Version::supports_sse4_1(), "");
2775   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, true, VEX_OPCODE_0F_3A);
2776   emit_int8(0x22);
2777   emit_int8((unsigned char)(0xC0 | encode));
2778   emit_int8(imm8);
2779 }
2780 
2781 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
2782   assert(VM_Version::supports_sse4_1(), "");
2783   if (VM_Version::supports_evex()) {
2784     tuple_type = EVEX_HVM;
2785   }
2786   InstructionMark im(this);
2787   simd_prefix(dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38);
2788   emit_int8(0x30);
2789   emit_operand(dst, src);
2790 }
2791 
2792 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
2793   assert(VM_Version::supports_sse4_1(), "");
2794   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38);
2795   emit_int8(0x30);
2796   emit_int8((unsigned char)(0xC0 | encode));
2797 }
2798 
2799 // generic
2800 void Assembler::pop(Register dst) {
2801   int encode = prefix_and_encode(dst->encoding());
2802   emit_int8(0x58 | encode);
2803 }
2804 
2805 void Assembler::popcntl(Register dst, Address src) {
2806   assert(VM_Version::supports_popcnt(), "must support");
2807   InstructionMark im(this);
2808   emit_int8((unsigned char)0xF3);
2809   prefix(src, dst);
2810   emit_int8(0x0F);
2811   emit_int8((unsigned char)0xB8);
2812   emit_operand(dst, src);
2813 }
2814

2877   InstructionMark im(this);
2878   prefetch_prefix(src);
2879   emit_int8(0x18);
2880   emit_operand(rbx, src); // 3, src
2881 }
2882 
2883 void Assembler::prefetchw(Address src) {
2884   assert(VM_Version::supports_3dnow_prefetch(), "must support");
2885   InstructionMark im(this);
2886   prefetch_prefix(src);
2887   emit_int8(0x0D);
2888   emit_operand(rcx, src); // 1, src
2889 }
2890 
2891 void Assembler::prefix(Prefix p) {
2892   emit_int8(p);
2893 }
2894 
2895 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
2896   assert(VM_Version::supports_ssse3(), "");
2897   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38);
2898   emit_int8(0x00);
2899   emit_int8((unsigned char)(0xC0 | encode));
2900 }
2901 
2902 void Assembler::pshufb(XMMRegister dst, Address src) {
2903   assert(VM_Version::supports_ssse3(), "");
2904   if (VM_Version::supports_evex()) {
2905     tuple_type = EVEX_FVM;
2906   }
2907   InstructionMark im(this);
2908   simd_prefix(dst, dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38);
2909   emit_int8(0x00);
2910   emit_operand(dst, src);
2911 }
2912 
2913 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
2914   assert(isByte(mode), "invalid value");
2915   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2916   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
2917   emit_int8(mode & 0xFF);
2918 
2919 }
2920 
2921 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
2922   assert(isByte(mode), "invalid value");
2923   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2924   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2925   if (VM_Version::supports_evex()) {
2926     tuple_type = EVEX_FV;
2927     input_size_in_bits = EVEX_32bit;
2928   }
2929   InstructionMark im(this);
2930   simd_prefix(dst, src, VEX_SIMD_66, false);
2931   emit_int8(0x70);
2932   emit_operand(dst, src);
2933   emit_int8(mode & 0xFF);
2934 }
2935 
2936 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
2937   assert(isByte(mode), "invalid value");
2938   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2939   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2);
2940   emit_int8(mode & 0xFF);
2941 }
2942 
2943 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
2944   assert(isByte(mode), "invalid value");
2945   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2946   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2947   if (VM_Version::supports_evex()) {
2948     tuple_type = EVEX_FVM;
2949   }
2950   InstructionMark im(this);
2951   simd_prefix(dst, src, VEX_SIMD_F2, false);
2952   emit_int8(0x70);
2953   emit_operand(dst, src);
2954   emit_int8(mode & 0xFF);
2955 }
2956 
2957 void Assembler::psrldq(XMMRegister dst, int shift) {
2958   // Shift 128 bit value in xmm register by number of bytes.
2959   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2960   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, true);
2961   emit_int8(0x73);
2962   emit_int8((unsigned char)(0xC0 | encode));
2963   emit_int8(shift);
2964 }
2965 
2966 void Assembler::ptest(XMMRegister dst, Address src) {
2967   assert(VM_Version::supports_sse4_1(), "");
2968   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2969   InstructionMark im(this);
2970   simd_prefix(dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38);
2971   emit_int8(0x17);
2972   emit_operand(dst, src);
2973 }
2974 
2975 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
2976   assert(VM_Version::supports_sse4_1(), "");
2977   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38);
2978   emit_int8(0x17);
2979   emit_int8((unsigned char)(0xC0 | encode));
2980 }
2981 
2982 void Assembler::vptest(XMMRegister dst, Address src) {
2983   assert(VM_Version::supports_avx(), "");
2984   InstructionMark im(this);
2985   int vector_len = AVX_256bit;
2986   assert(dst != xnoreg, "sanity");
2987   int dst_enc = dst->encoding();
2988   // swap src<->dst for encoding
2989   vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len);
2990   emit_int8(0x17);
2991   emit_operand(dst, src);
2992 }
2993 
2994 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
2995   assert(VM_Version::supports_avx(), "");
2996   int vector_len = AVX_256bit;
2997   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
2998   emit_int8(0x17);
2999   emit_int8((unsigned char)(0xC0 | encode));
3000 }
3001 
3002 void Assembler::punpcklbw(XMMRegister dst, Address src) {
3003   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3004   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3005   if (VM_Version::supports_evex()) {
3006     tuple_type = EVEX_FVM;
3007   }
3008   emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
3009 }
3010 
3011 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
3012   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3013   emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
3014 }
3015 
3016 void Assembler::punpckldq(XMMRegister dst, Address src) {
3017   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3018   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3019   if (VM_Version::supports_evex()) {
3020     tuple_type = EVEX_FV;
3021     input_size_in_bits = EVEX_32bit;
3022   }
3023   emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
3024 }
3025 
3026 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
3027   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3028   emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
3029 }
3030 
3031 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
3032   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3033   emit_simd_arith(0x6C, dst, src, VEX_SIMD_66);
3034 }
3035 
3036 void Assembler::push(int32_t imm32) {
3037   // in 64bits we push 64bits onto the stack but only
3038   // take a 32bit immediate
3039   emit_int8(0x68);
3040   emit_int32(imm32);
3041 }
3042

3212   assert(isShiftCount(imm8), "illegal shift count");
3213   int encode = prefix_and_encode(dst->encoding());
3214   emit_int8((unsigned char)0xC1);
3215   emit_int8((unsigned char)(0xE8 | encode));
3216   emit_int8(imm8);
3217 }
3218 
3219 void Assembler::shrl(Register dst) {
3220   int encode = prefix_and_encode(dst->encoding());
3221   emit_int8((unsigned char)0xD3);
3222   emit_int8((unsigned char)(0xE8 | encode));
3223 }
3224 
3225 // copies a single word from [esi] to [edi]
3226 void Assembler::smovl() {
3227   emit_int8((unsigned char)0xA5);
3228 }
3229 
3230 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
3231   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3232   if (VM_Version::supports_evex()) {
3233     emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2);
3234   } else {
3235     emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
3236   }
3237 }
3238 
3239 void Assembler::sqrtsd(XMMRegister dst, Address src) {
3240   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3241   if (VM_Version::supports_evex()) {
3242     tuple_type = EVEX_T1S;
3243     input_size_in_bits = EVEX_64bit;
3244     emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2);
3245   } else {
3246     emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
3247   }
3248 }
3249 
3250 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
3251   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3252   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
3253 }
3254 
3255 void Assembler::std() {
3256   emit_int8((unsigned char)0xFD);
3257 }
3258 
3259 void Assembler::sqrtss(XMMRegister dst, Address src) {
3260   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3261   if (VM_Version::supports_evex()) {
3262     tuple_type = EVEX_T1S;
3263     input_size_in_bits = EVEX_32bit;
3264   }
3265   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
3266 }
3267 
3268 void Assembler::stmxcsr( Address dst) {
3269   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3270   InstructionMark im(this);
3271   prefix(dst);
3272   emit_int8(0x0F);
3273   emit_int8((unsigned char)0xAE);
3274   emit_operand(as_Register(3), dst);
3275 }
3276 
3277 void Assembler::subl(Address dst, int32_t imm32) {
3278   InstructionMark im(this);
3279   prefix(dst);
3280   emit_arith_operand(0x81, rbp, dst, imm32);
3281 }
3282 
3283 void Assembler::subl(Address dst, Register src) {
3284   InstructionMark im(this);

3295 // Force generation of a 4 byte immediate value even if it fits into 8bit
3296 void Assembler::subl_imm32(Register dst, int32_t imm32) {
3297   prefix(dst);
3298   emit_arith_imm32(0x81, 0xE8, dst, imm32);
3299 }
3300 
3301 void Assembler::subl(Register dst, Address src) {
3302   InstructionMark im(this);
3303   prefix(src, dst);
3304   emit_int8(0x2B);
3305   emit_operand(dst, src);
3306 }
3307 
3308 void Assembler::subl(Register dst, Register src) {
3309   (void) prefix_and_encode(dst->encoding(), src->encoding());
3310   emit_arith(0x2B, 0xC0, dst, src);
3311 }
3312 
3313 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
3314   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3315   if (VM_Version::supports_evex()) {
3316     emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2);
3317   } else {
3318     emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
3319   }
3320 }
3321 
3322 void Assembler::subsd(XMMRegister dst, Address src) {
3323   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3324   if (VM_Version::supports_evex()) {
3325     tuple_type = EVEX_T1S;
3326     input_size_in_bits = EVEX_64bit;
3327   }
3328   emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2);
3329 }
3330 
3331 void Assembler::subss(XMMRegister dst, XMMRegister src) {
3332   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3333   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
3334 }
3335 
3336 void Assembler::subss(XMMRegister dst, Address src) {
3337   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3338   if (VM_Version::supports_evex()) {
3339     tuple_type = EVEX_T1S;
3340     input_size_in_bits = EVEX_32bit;
3341   }
3342   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
3343 }
3344 
3345 void Assembler::testb(Register dst, int imm8) {
3346   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
3347   (void) prefix_and_encode(dst->encoding(), true);
3348   emit_arith_b(0xF6, 0xC0, dst, imm8);
3349 }
3350 
3351 void Assembler::testl(Register dst, int32_t imm32) {
3352   // not using emit_arith because test
3353   // doesn't support sign-extension of
3354   // 8bit operands
3355   int encode = dst->encoding();
3356   if (encode == 0) {
3357     emit_int8((unsigned char)0xA9);
3358   } else {
3359     encode = prefix_and_encode(encode);
3360     emit_int8((unsigned char)0xF7);
3361     emit_int8((unsigned char)(0xC0 | encode));

3378 void Assembler::tzcntl(Register dst, Register src) {
3379   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
3380   emit_int8((unsigned char)0xF3);
3381   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3382   emit_int8(0x0F);
3383   emit_int8((unsigned char)0xBC);
3384   emit_int8((unsigned char)0xC0 | encode);
3385 }
3386 
3387 void Assembler::tzcntq(Register dst, Register src) {
3388   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
3389   emit_int8((unsigned char)0xF3);
3390   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3391   emit_int8(0x0F);
3392   emit_int8((unsigned char)0xBC);
3393   emit_int8((unsigned char)(0xC0 | encode));
3394 }
3395 
3396 void Assembler::ucomisd(XMMRegister dst, Address src) {
3397   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3398   if (VM_Version::supports_evex()) {
3399     tuple_type = EVEX_T1S;
3400     input_size_in_bits = EVEX_64bit;
3401     emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, true);
3402   } else {
3403     emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
3404   }
3405 }
3406 
3407 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
3408   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3409   if (VM_Version::supports_evex()) {
3410     emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, true);
3411   } else {
3412     emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
3413   }
3414 }
3415 
3416 void Assembler::ucomiss(XMMRegister dst, Address src) {
3417   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3418   if (VM_Version::supports_evex()) {
3419     tuple_type = EVEX_T1S;
3420     input_size_in_bits = EVEX_32bit;
3421   }
3422   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, true);
3423 }
3424 
3425 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
3426   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3427   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, true);
3428 }
3429 
3430 void Assembler::xabort(int8_t imm8) {
3431   emit_int8((unsigned char)0xC6);
3432   emit_int8((unsigned char)0xF8);
3433   emit_int8((unsigned char)(imm8 & 0xFF));
3434 }
3435 
3436 void Assembler::xaddl(Address dst, Register src) {
3437   InstructionMark im(this);
3438   prefix(dst, src);
3439   emit_int8(0x0F);
3440   emit_int8((unsigned char)0xC1);
3441   emit_operand(src, dst);
3442 }
3443 
3444 void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
3445   InstructionMark im(this);
3446   relocate(rtype);
3447   if (abort.is_bound()) {

3489   emit_arith(0x81, 0xF0, dst, imm32);
3490 }
3491 
3492 void Assembler::xorl(Register dst, Address src) {
3493   InstructionMark im(this);
3494   prefix(src, dst);
3495   emit_int8(0x33);
3496   emit_operand(dst, src);
3497 }
3498 
3499 void Assembler::xorl(Register dst, Register src) {
3500   (void) prefix_and_encode(dst->encoding(), src->encoding());
3501   emit_arith(0x33, 0xC0, dst, src);
3502 }
3503 
3504 
3505 // AVX 3-operands scalar float-point arithmetic instructions
3506 
3507 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
3508   assert(VM_Version::supports_avx(), "");
3509   if (VM_Version::supports_evex()) {
3510     tuple_type = EVEX_T1S;
3511     input_size_in_bits = EVEX_64bit;
3512     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, /* vector_len */ AVX_128bit);
3513   } else {
3514     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector_len */ AVX_128bit);
3515   }
3516 }
3517 
3518 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3519   assert(VM_Version::supports_avx(), "");
3520   if (VM_Version::supports_evex()) {
3521     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, /* vector_len */ AVX_128bit);
3522   } else {
3523     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector_len */ AVX_128bit);
3524   }
3525 }
3526 
3527 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
3528   assert(VM_Version::supports_avx(), "");
3529   if (VM_Version::supports_evex()) {
3530     tuple_type = EVEX_T1S;
3531     input_size_in_bits = EVEX_32bit;
3532   }
3533   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector_len */ AVX_128bit);
3534 }
3535 
3536 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3537   assert(VM_Version::supports_avx(), "");
3538   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector_len */ AVX_128bit);
3539 }
3540 
3541 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
3542   assert(VM_Version::supports_avx(), "");
3543   if (VM_Version::supports_evex()) {
3544     tuple_type = EVEX_T1S;
3545     input_size_in_bits = EVEX_64bit;
3546     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector_len */ AVX_128bit);
3547   } else {
3548     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector_len */ AVX_128bit);
3549   }
3550 }
3551 
3552 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3553   assert(VM_Version::supports_avx(), "");
3554   if (VM_Version::supports_evex()) {
3555     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector_len */ AVX_128bit);
3556   } else {
3557     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector_len */ AVX_128bit);
3558   }
3559 }
3560 
3561 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
3562   assert(VM_Version::supports_avx(), "");
3563   if (VM_Version::supports_evex()) {
3564     tuple_type = EVEX_T1S;
3565     input_size_in_bits = EVEX_32bit;
3566   }
3567   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector_len */ AVX_128bit);
3568 }
3569 
3570 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3571   assert(VM_Version::supports_avx(), "");
3572   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector_len */ AVX_128bit);
3573 }
3574 
3575 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
3576   assert(VM_Version::supports_avx(), "");
3577   if (VM_Version::supports_evex()) {
3578     tuple_type = EVEX_T1S;
3579     input_size_in_bits = EVEX_64bit;
3580     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, /* vector_len */ AVX_128bit);
3581   } else {
3582     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector_len */ AVX_128bit);
3583   }
3584 }
3585 
3586 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3587   assert(VM_Version::supports_avx(), "");
3588   if (VM_Version::supports_evex()) {
3589     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, /* vector_len */ AVX_128bit);
3590   } else {
3591     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector_len */ AVX_128bit);
3592   }
3593 }
3594 
3595 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
3596   assert(VM_Version::supports_avx(), "");
3597   if (VM_Version::supports_evex()) {
3598     tuple_type = EVEX_T1S;
3599     input_size_in_bits = EVEX_32bit;
3600   }
3601   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector_len */ AVX_128bit);
3602 }
3603 
3604 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3605   assert(VM_Version::supports_avx(), "");
3606   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector_len */ AVX_128bit);
3607 }
3608 
3609 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
3610   assert(VM_Version::supports_avx(), "");
3611   if (VM_Version::supports_evex()) {
3612     tuple_type = EVEX_T1S;
3613     input_size_in_bits = EVEX_64bit;
3614     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector_len */ AVX_128bit);
3615   } else {
3616     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector_len */ AVX_128bit);
3617   }
3618 }
3619 
3620 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3621   assert(VM_Version::supports_avx(), "");
3622   if (VM_Version::supports_evex()) {
3623     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector_len */ AVX_128bit);
3624   } else {
3625     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector_len */ AVX_128bit);
3626   }
3627 }
3628 
3629 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
3630   assert(VM_Version::supports_avx(), "");
3631   if (VM_Version::supports_evex()) {
3632     tuple_type = EVEX_T1S;
3633     input_size_in_bits = EVEX_32bit;
3634   }
3635   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector_len */ AVX_128bit);
3636 }
3637 
3638 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3639   assert(VM_Version::supports_avx(), "");
3640   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector_len */ AVX_128bit);
3641 }
3642 
3643 //====================VECTOR ARITHMETIC=====================================
3644 
3645 // Float-point vector arithmetic
3646 
3647 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
3648   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3649   if (VM_Version::supports_evex()) {
3650     emit_simd_arith_q(0x58, dst, src, VEX_SIMD_66);
3651   } else {
3652     emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
3653   }
3654 }
3655 
3656 void Assembler::addps(XMMRegister dst, XMMRegister src) {
3657   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3658   emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
3659 }
3660 
3661 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3662   assert(VM_Version::supports_avx(), "");
3663   if (VM_Version::supports_evex()) {
3664     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
3665   } else {
3666     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
3667   }
3668 }
3669 
3670 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3671   assert(VM_Version::supports_avx(), "");
3672   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len);
3673 }
3674 
3675 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3676   assert(VM_Version::supports_avx(), "");
3677   if (VM_Version::supports_evex()) {
3678     tuple_type = EVEX_FV;
3679     input_size_in_bits = EVEX_64bit;
3680     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
3681   } else {
3682     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
3683   }
3684 }
3685 
3686 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3687   assert(VM_Version::supports_avx(), "");
3688   if (VM_Version::supports_evex()) {
3689     tuple_type = EVEX_FV;
3690     input_size_in_bits = EVEX_32bit;
3691   }
3692   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len);
3693 }
3694 
3695 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
3696   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3697   if (VM_Version::supports_evex()) {
3698     emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_66);
3699   } else {
3700     emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
3701   }
3702 }
3703 
3704 void Assembler::subps(XMMRegister dst, XMMRegister src) {
3705   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3706   emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
3707 }
3708 
3709 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3710   assert(VM_Version::supports_avx(), "");
3711   if (VM_Version::supports_evex()) {
3712     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
3713   } else {
3714     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
3715   }
3716 }
3717 
3718 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3719   assert(VM_Version::supports_avx(), "");
3720   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len);
3721 }
3722 
3723 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3724   assert(VM_Version::supports_avx(), "");
3725   if (VM_Version::supports_evex()) {
3726     tuple_type = EVEX_FV;
3727     input_size_in_bits = EVEX_64bit;
3728     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
3729   } else {
3730     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
3731   }
3732 }
3733 
3734 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3735   assert(VM_Version::supports_avx(), "");
3736   if (VM_Version::supports_evex()) {
3737     tuple_type = EVEX_FV;
3738     input_size_in_bits = EVEX_32bit;
3739   }
3740   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len);
3741 }
3742 
3743 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
3744   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3745   if (VM_Version::supports_evex()) {
3746     emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66);
3747   } else {
3748     emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
3749   }
3750 }
3751 
3752 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
3753   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3754   emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
3755 }
3756 
3757 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3758   assert(VM_Version::supports_avx(), "");
3759   if (VM_Version::supports_evex()) {
3760     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
3761   } else {
3762     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
3763   }
3764 }
3765 
3766 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3767   assert(VM_Version::supports_avx(), "");
3768   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len);
3769 }
3770 
3771 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3772   assert(VM_Version::supports_avx(), "");
3773   if (VM_Version::supports_evex()) {
3774     tuple_type = EVEX_FV;
3775     input_size_in_bits = EVEX_64bit;
3776     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
3777   } else {
3778     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
3779   }
3780 }
3781 
3782 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3783   assert(VM_Version::supports_avx(), "");
3784   if (VM_Version::supports_evex()) {
3785     tuple_type = EVEX_FV;
3786     input_size_in_bits = EVEX_32bit;
3787   }
3788   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len);
3789 }
3790 
3791 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
3792   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3793   if (VM_Version::supports_evex()) {
3794     emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_66);
3795   } else {
3796     emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
3797   }
3798 }
3799 
3800 void Assembler::divps(XMMRegister dst, XMMRegister src) {
3801   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3802   emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
3803 }
3804 
3805 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3806   assert(VM_Version::supports_avx(), "");
3807   if (VM_Version::supports_evex()) {
3808     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
3809   } else {
3810     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
3811   }
3812 }
3813 
3814 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3815   assert(VM_Version::supports_avx(), "");
3816   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len);
3817 }
3818 
3819 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3820   assert(VM_Version::supports_avx(), "");
3821   if (VM_Version::supports_evex()) {
3822     tuple_type = EVEX_FV;
3823     input_size_in_bits = EVEX_64bit;
3824     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
3825   } else {
3826     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
3827   }
3828 }
3829 
3830 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3831   assert(VM_Version::supports_avx(), "");
3832   if (VM_Version::supports_evex()) {
3833     tuple_type = EVEX_FV;
3834     input_size_in_bits = EVEX_32bit;
3835   }
3836   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len);
3837 }
3838 
3839 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
3840   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3841   if (VM_Version::supports_evex()) {
3842     emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66);
3843   } else {
3844     emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
3845   }
3846 }
3847 
3848 void Assembler::andps(XMMRegister dst, XMMRegister src) {
3849   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3850   emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
3851 }
3852 
3853 void Assembler::andps(XMMRegister dst, Address src) {
3854   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3855   if (VM_Version::supports_evex()) {
3856     tuple_type = EVEX_FV;
3857     input_size_in_bits = EVEX_32bit;
3858   }
3859   emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
3860 }
3861 
3862 void Assembler::andpd(XMMRegister dst, Address src) {
3863   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3864   if (VM_Version::supports_evex()) {
3865     tuple_type = EVEX_FV;
3866     input_size_in_bits = EVEX_64bit;
3867     emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66);
3868   } else {
3869     emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
3870   }
3871 }
3872 
3873 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3874   assert(VM_Version::supports_avx(), "");
3875   if (VM_Version::supports_evex()) {
3876     emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
3877   } else {
3878     emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
3879   }
3880 }
3881 
3882 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3883   assert(VM_Version::supports_avx(), "");
3884   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len);
3885 }
3886 
3887 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3888   assert(VM_Version::supports_avx(), "");
3889   if (VM_Version::supports_evex()) {
3890     tuple_type = EVEX_FV;
3891     input_size_in_bits = EVEX_64bit;
3892     emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
3893   } else {
3894     emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
3895   }
3896 }
3897 
3898 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3899   assert(VM_Version::supports_avx(), "");
3900   if (VM_Version::supports_evex()) {
3901     tuple_type = EVEX_FV;
3902     input_size_in_bits = EVEX_32bit;
3903   }
3904   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len);
3905 }
3906 
3907 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
3908   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3909   if (VM_Version::supports_evex()) {
3910     emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66);
3911   } else {
3912     emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
3913   }
3914 }
3915 
3916 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
3917   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3918   emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
3919 }
3920 
3921 void Assembler::xorpd(XMMRegister dst, Address src) {
3922   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3923   if (VM_Version::supports_evex()) {
3924     tuple_type = EVEX_FV;
3925     input_size_in_bits = EVEX_64bit;
3926     emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66);
3927   } else {
3928     emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
3929   }
3930 }
3931 
3932 void Assembler::xorps(XMMRegister dst, Address src) {
3933   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3934   if (VM_Version::supports_evex()) {
3935     tuple_type = EVEX_FV;
3936     input_size_in_bits = EVEX_32bit;
3937   }
3938   emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
3939 }
3940 
3941 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3942   assert(VM_Version::supports_avx(), "");
3943   if (VM_Version::supports_evex()) {
3944     emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
3945   } else {
3946     emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
3947   }
3948 }
3949 
3950 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3951   assert(VM_Version::supports_avx(), "");
3952   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len);
3953 }
3954 
3955 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3956   assert(VM_Version::supports_avx(), "");
3957   if (VM_Version::supports_evex()) {
3958     tuple_type = EVEX_FV;
3959     input_size_in_bits = EVEX_64bit;
3960     emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
3961   } else {
3962     emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
3963   }
3964 }
3965 
3966 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3967   assert(VM_Version::supports_avx(), "");
3968   if (VM_Version::supports_evex()) {
3969     tuple_type = EVEX_FV;
3970     input_size_in_bits = EVEX_32bit;
3971   }
3972   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len);
3973 }
3974 

3975 // Integer vector arithmetic
3976 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3977   assert(VM_Version::supports_avx() && (vector_len == 0) || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3978   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, true, false);
3979   emit_int8(0x01);
3980   emit_int8((unsigned char)(0xC0 | encode));
3981 }
3982 
3983 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3984   assert(VM_Version::supports_avx() && (vector_len == 0) || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3985   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, true, false);
3986   emit_int8(0x02);
3987   emit_int8((unsigned char)(0xC0 | encode));
3988 }
3989 
3990 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
3991   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3992   emit_simd_arith(0xFC, dst, src, VEX_SIMD_66);
3993 }
3994 
3995 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
3996   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3997   emit_simd_arith(0xFD, dst, src, VEX_SIMD_66);
3998 }
3999 
4000 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
4001   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4002   emit_simd_arith(0xFE, dst, src, VEX_SIMD_66);
4003 }
4004 
4005 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
4006   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4007   if (VM_Version::supports_evex()) {
4008     emit_simd_arith_q(0xD4, dst, src, VEX_SIMD_66);
4009   } else {
4010     emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
4011   }
4012 }
4013 
4014 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
4015   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
4016   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38, false, AVX_128bit, true);
4017   emit_int8(0x01);
4018   emit_int8((unsigned char)(0xC0 | encode));
4019 }
4020 
4021 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
4022   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
4023   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38, false, AVX_128bit, true);
4024   emit_int8(0x02);
4025   emit_int8((unsigned char)(0xC0 | encode));
4026 }
4027 
4028 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4029   assert(UseAVX > 0, "requires some form of AVX");
4030   emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len);
4031 }
4032 
4033 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4034   assert(UseAVX > 0, "requires some form of AVX");
4035   emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len);
4036 }
4037 
4038 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4039   assert(UseAVX > 0, "requires some form of AVX");
4040   emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len);
4041 }
4042 
4043 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4044   assert(UseAVX > 0, "requires some form of AVX");
4045   if (VM_Version::supports_evex()) {
4046     emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4047   } else {
4048     emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4049   }
4050 }
4051 
4052 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4053   assert(UseAVX > 0, "requires some form of AVX");
4054   if (VM_Version::supports_evex()) {
4055     tuple_type = EVEX_FVM;
4056   }
4057   emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len);
4058 }
4059 
4060 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4061   assert(UseAVX > 0, "requires some form of AVX");
4062   if (VM_Version::supports_evex()) {
4063     tuple_type = EVEX_FVM;
4064   }
4065   emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len);
4066 }
4067 
4068 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4069   assert(UseAVX > 0, "requires some form of AVX");
4070   if (VM_Version::supports_evex()) {
4071     tuple_type = EVEX_FV;
4072     input_size_in_bits = EVEX_32bit;
4073   }
4074   emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len);
4075 }
4076 
4077 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4078   assert(UseAVX > 0, "requires some form of AVX");
4079   if (VM_Version::supports_evex()) {
4080     tuple_type = EVEX_FV;
4081     input_size_in_bits = EVEX_64bit;
4082     emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4083   } else {
4084     emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4085   }
4086 }
4087 
4088 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
4089   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4090   emit_simd_arith(0xF8, dst, src, VEX_SIMD_66);
4091 }
4092 
4093 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
4094   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4095   emit_simd_arith(0xF9, dst, src, VEX_SIMD_66);
4096 }
4097 
4098 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
4099   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4100   emit_simd_arith(0xFA, dst, src, VEX_SIMD_66);
4101 }
4102 
4103 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
4104   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4105   if (VM_Version::supports_evex()) {
4106     emit_simd_arith_q(0xFB, dst, src, VEX_SIMD_66);
4107   } else {
4108     emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
4109   }
4110 }
4111 
4112 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4113   assert(UseAVX > 0, "requires some form of AVX");
4114   emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len);
4115 }
4116 
4117 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4118   assert(UseAVX > 0, "requires some form of AVX");
4119   emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len);
4120 }
4121 
4122 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4123   assert(UseAVX > 0, "requires some form of AVX");
4124   emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len);
4125 }
4126 
4127 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4128   assert(UseAVX > 0, "requires some form of AVX");
4129   if (VM_Version::supports_evex()) {
4130     emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4131   } else {
4132     emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4133   }
4134 }
4135 
4136 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4137   assert(UseAVX > 0, "requires some form of AVX");
4138   if (VM_Version::supports_evex()) {
4139     tuple_type = EVEX_FVM;
4140   }
4141   emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len);
4142 }
4143 
4144 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4145   assert(UseAVX > 0, "requires some form of AVX");
4146   if (VM_Version::supports_evex()) {
4147     tuple_type = EVEX_FVM;
4148   }
4149   emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len);
4150 }
4151 
4152 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4153   assert(UseAVX > 0, "requires some form of AVX");
4154   if (VM_Version::supports_evex()) {
4155     tuple_type = EVEX_FV;
4156     input_size_in_bits = EVEX_32bit;
4157   }
4158   emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len);
4159 }
4160 
4161 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4162   assert(UseAVX > 0, "requires some form of AVX");
4163   if (VM_Version::supports_evex()) {
4164     tuple_type = EVEX_FV;
4165     input_size_in_bits = EVEX_64bit;
4166     emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4167   } else {
4168     emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4169   }
4170 }
4171 
4172 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
4173   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4174   emit_simd_arith(0xD5, dst, src, VEX_SIMD_66);
4175 }
4176 
4177 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
4178   assert(VM_Version::supports_sse4_1(), "");
4179   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38);
4180   emit_int8(0x40);
4181   emit_int8((unsigned char)(0xC0 | encode));
4182 }
4183 
4184 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4185   assert(UseAVX > 0, "requires some form of AVX");
4186   emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len);
4187 }
4188 
4189 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4190   assert(UseAVX > 0, "requires some form of AVX");
4191   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
4192   emit_int8(0x40);
4193   emit_int8((unsigned char)(0xC0 | encode));
4194 }
4195 
4196 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4197   assert(UseAVX > 2, "requires some form of AVX");
4198   int src_enc = src->encoding();
4199   int dst_enc = dst->encoding();
4200   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4201   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, true, vector_len, false, false);
4202   emit_int8(0x40);
4203   emit_int8((unsigned char)(0xC0 | encode));
4204 }
4205 
4206 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4207   assert(UseAVX > 0, "requires some form of AVX");
4208   if (VM_Version::supports_evex()) {
4209     tuple_type = EVEX_FVM;
4210   }
4211   emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len);
4212 }
4213 
4214 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4215   assert(UseAVX > 0, "requires some form of AVX");
4216   if (VM_Version::supports_evex()) {
4217     tuple_type = EVEX_FV;
4218     input_size_in_bits = EVEX_32bit;
4219   }
4220   InstructionMark im(this);
4221   int dst_enc = dst->encoding();
4222   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4223   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len);
4224   emit_int8(0x40);
4225   emit_operand(dst, src);
4226 }
4227 
4228 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4229   assert(UseAVX > 0, "requires some form of AVX");
4230   if (VM_Version::supports_evex()) {
4231     tuple_type = EVEX_FV;
4232     input_size_in_bits = EVEX_64bit;
4233   }
4234   InstructionMark im(this);
4235   int dst_enc = dst->encoding();
4236   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4237   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, true, vector_len);
4238   emit_int8(0x40);
4239   emit_operand(dst, src);
4240 }
4241 
4242 // Shift packed integers left by specified number of bits.
4243 void Assembler::psllw(XMMRegister dst, int shift) {
4244   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4245   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
4246   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, false);
4247   emit_int8(0x71);
4248   emit_int8((unsigned char)(0xC0 | encode));
4249   emit_int8(shift & 0xFF);
4250 }
4251 
4252 void Assembler::pslld(XMMRegister dst, int shift) {
4253   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4254   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
4255   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, false);
4256   emit_int8(0x72);
4257   emit_int8((unsigned char)(0xC0 | encode));
4258   emit_int8(shift & 0xFF);
4259 }
4260 
4261 void Assembler::psllq(XMMRegister dst, int shift) {
4262   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4263   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
4264   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F, true);
4265   emit_int8(0x73);
4266   emit_int8((unsigned char)(0xC0 | encode));
4267   emit_int8(shift & 0xFF);
4268 }
4269 
4270 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
4271   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4272   emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66);
4273 }
4274 
4275 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
4276   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4277   emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66);
4278 }
4279 
4280 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
4281   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4282   if (VM_Version::supports_evex()) {
4283     emit_simd_arith_q(0xF3, dst, shift, VEX_SIMD_66);
4284   } else {
4285     emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
4286   }
4287 }
4288 
4289 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4290   assert(UseAVX > 0, "requires some form of AVX");
4291   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
4292   emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector_len);
4293   emit_int8(shift & 0xFF);
4294 }
4295 
4296 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4297   assert(UseAVX > 0, "requires some form of AVX");
4298   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
4299   emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector_len);
4300   emit_int8(shift & 0xFF);
4301 }
4302 
4303 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4304   assert(UseAVX > 0, "requires some form of AVX");
4305   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
4306   if (VM_Version::supports_evex()) {
4307     emit_vex_arith_q(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len);
4308   } else {
4309     emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len);
4310   }
4311   emit_int8(shift & 0xFF);
4312 }
4313 
4314 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4315   assert(UseAVX > 0, "requires some form of AVX");
4316   emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector_len);
4317 }
4318 
4319 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4320   assert(UseAVX > 0, "requires some form of AVX");
4321   emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector_len);
4322 }
4323 
4324 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4325   assert(UseAVX > 0, "requires some form of AVX");
4326   if (VM_Version::supports_evex()) {
4327     emit_vex_arith_q(0xF3, dst, src, shift, VEX_SIMD_66, vector_len);
4328   } else {
4329     emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector_len);
4330   }
4331 }
4332 
4333 // Shift packed integers logically right by specified number of bits.
4334 void Assembler::psrlw(XMMRegister dst, int shift) {
4335   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4336   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
4337   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, false);
4338   emit_int8(0x71);
4339   emit_int8((unsigned char)(0xC0 | encode));
4340   emit_int8(shift & 0xFF);
4341 }
4342 
4343 void Assembler::psrld(XMMRegister dst, int shift) {
4344   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4345   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
4346   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, false);
4347   emit_int8(0x72);
4348   emit_int8((unsigned char)(0xC0 | encode));
4349   emit_int8(shift & 0xFF);
4350 }
4351 
4352 void Assembler::psrlq(XMMRegister dst, int shift) {
4353   // Do not confuse it with psrldq SSE2 instruction which
4354   // shifts 128 bit value in xmm register by number of bytes.
4355   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4356   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
4357   int encode = 0;
4358   if (VM_Version::supports_evex()) {
4359     encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, true, VEX_OPCODE_0F, true);
4360   } else {
4361     encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F, true);
4362   }
4363   emit_int8(0x73);
4364   emit_int8((unsigned char)(0xC0 | encode));
4365   emit_int8(shift & 0xFF);
4366 }
4367 
4368 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
4369   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4370   emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66);
4371 }
4372 
4373 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
4374   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4375   emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66);
4376 }
4377 
4378 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
4379   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4380   if (VM_Version::supports_evex()) {
4381     emit_simd_arith_q(0xD3, dst, shift, VEX_SIMD_66);
4382   } else {
4383     emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
4384   }
4385 }
4386 
4387 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4388   assert(UseAVX > 0, "requires some form of AVX");
4389   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
4390   emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector_len);
4391   emit_int8(shift & 0xFF);
4392 }
4393 
4394 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4395   assert(UseAVX > 0, "requires some form of AVX");
4396   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
4397   emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector_len);
4398   emit_int8(shift & 0xFF);
4399 }
4400 
4401 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4402   assert(UseAVX > 0, "requires some form of AVX");
4403   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
4404   if (VM_Version::supports_evex()) {
4405     emit_vex_arith_q(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len);
4406   } else {
4407     emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len);
4408   }
4409   emit_int8(shift & 0xFF);
4410 }
4411 
4412 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4413   assert(UseAVX > 0, "requires some form of AVX");
4414   emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector_len);
4415 }
4416 
4417 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4418   assert(UseAVX > 0, "requires some form of AVX");
4419   emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector_len);
4420 }
4421 
4422 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4423   assert(UseAVX > 0, "requires some form of AVX");
4424   if (VM_Version::supports_evex()) {
4425     emit_vex_arith_q(0xD3, dst, src, shift, VEX_SIMD_66, vector_len);
4426   } else {
4427     emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector_len);
4428   }
4429 }
4430 
4431 // Shift packed integers arithmetically right by specified number of bits.
4432 void Assembler::psraw(XMMRegister dst, int shift) {
4433   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4434   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
4435   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, false);
4436   emit_int8(0x71);
4437   emit_int8((unsigned char)(0xC0 | encode));
4438   emit_int8(shift & 0xFF);
4439 }
4440 
4441 void Assembler::psrad(XMMRegister dst, int shift) {
4442   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4443   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
4444   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, false);
4445   emit_int8(0x72);
4446   emit_int8((unsigned char)(0xC0 | encode));
4447   emit_int8(shift & 0xFF);
4448 }
4449 
4450 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
4451   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4452   emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66);
4453 }
4454 
4455 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
4456   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4457   emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
4458 }
4459 
4460 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4461   assert(UseAVX > 0, "requires some form of AVX");
4462   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
4463   emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector_len);
4464   emit_int8(shift & 0xFF);
4465 }
4466 
4467 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4468   assert(UseAVX > 0, "requires some form of AVX");
4469   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
4470   emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector_len);
4471   emit_int8(shift & 0xFF);
4472 }
4473 
4474 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4475   assert(UseAVX > 0, "requires some form of AVX");
4476   emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector_len);
4477 }
4478 
4479 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4480   assert(UseAVX > 0, "requires some form of AVX");
4481   emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector_len);
4482 }
4483 
4484 
4485 // AND packed integers
4486 void Assembler::pand(XMMRegister dst, XMMRegister src) {
4487   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4488   emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
4489 }
4490 
4491 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4492   assert(UseAVX > 0, "requires some form of AVX");
4493   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
4494 }
4495 
4496 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4497   assert(UseAVX > 0, "requires some form of AVX");
4498   if (VM_Version::supports_evex()) {
4499     tuple_type = EVEX_FV;
4500     input_size_in_bits = EVEX_32bit;
4501   }
4502   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
4503 }
4504 
4505 void Assembler::por(XMMRegister dst, XMMRegister src) {
4506   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4507   emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
4508 }
4509 
4510 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4511   assert(UseAVX > 0, "requires some form of AVX");
4512   emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len);
4513 }
4514 
4515 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4516   assert(UseAVX > 0, "requires some form of AVX");
4517   if (VM_Version::supports_evex()) {
4518     tuple_type = EVEX_FV;
4519     input_size_in_bits = EVEX_32bit;
4520   }
4521   emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len);
4522 }
4523 
4524 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
4525   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4526   emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
4527 }
4528 
4529 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4530   assert(UseAVX > 0, "requires some form of AVX");
4531   emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len);
4532 }
4533 
4534 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4535   assert(UseAVX > 0, "requires some form of AVX");
4536   if (VM_Version::supports_evex()) {
4537     tuple_type = EVEX_FV;
4538     input_size_in_bits = EVEX_32bit;
4539   }
4540   emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len);
4541 }
4542 
4543 
4544 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4545   assert(VM_Version::supports_avx(), "");
4546   int vector_len = AVX_256bit;
4547   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
4548   emit_int8(0x18);
4549   emit_int8((unsigned char)(0xC0 | encode));
4550   // 0x00 - insert into lower 128 bits
4551   // 0x01 - insert into upper 128 bits
4552   emit_int8(0x01);
4553 }
4554 
4555 void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4556   assert(VM_Version::supports_evex(), "");
4557   int vector_len = AVX_512bit;
4558   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
4559   emit_int8(0x18);
4560   emit_int8((unsigned char)(0xC0 | encode));
4561   // 0x00 - insert into lower 256 bits
4562   // 0x01 - insert into upper 256 bits
4563   emit_int8(0x01);
4564 }
4565 
4566 void Assembler::vinsertf128h(XMMRegister dst, Address src) {
4567   assert(VM_Version::supports_avx(), "");
4568   if (VM_Version::supports_evex()) {
4569     tuple_type = EVEX_T4;
4570     input_size_in_bits = EVEX_32bit;
4571   }
4572   InstructionMark im(this);
4573   int vector_len = AVX_256bit;
4574   assert(dst != xnoreg, "sanity");
4575   int dst_enc = dst->encoding();
4576   // swap src<->dst for encoding
4577   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len);
4578   emit_int8(0x18);
4579   emit_operand(dst, src);
4580   // 0x01 - insert into upper 128 bits
4581   emit_int8(0x01);
4582 }
4583 
4584 void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) {
4585   assert(VM_Version::supports_avx(), "");
4586   int vector_len = AVX_256bit;
4587   int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
4588   emit_int8(0x19);
4589   emit_int8((unsigned char)(0xC0 | encode));
4590   // 0x00 - insert into lower 128 bits
4591   // 0x01 - insert into upper 128 bits
4592   emit_int8(0x01);
4593 }
4594 
4595 void Assembler::vextractf128h(Address dst, XMMRegister src) {
4596   assert(VM_Version::supports_avx(), "");
4597   if (VM_Version::supports_evex()) {
4598     tuple_type = EVEX_T4;
4599     input_size_in_bits = EVEX_32bit;
4600   }
4601   InstructionMark im(this);
4602   int vector_len = AVX_256bit;
4603   assert(src != xnoreg, "sanity");
4604   int src_enc = src->encoding();
4605   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len);
4606   emit_int8(0x19);
4607   emit_operand(src, dst);
4608   // 0x01 - extract from upper 128 bits
4609   emit_int8(0x01);
4610 }
4611 
4612 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4613   assert(VM_Version::supports_avx2(), "");
4614   int vector_len = AVX_256bit;
4615   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
4616   emit_int8(0x38);
4617   emit_int8((unsigned char)(0xC0 | encode));
4618   // 0x00 - insert into lower 128 bits
4619   // 0x01 - insert into upper 128 bits
4620   emit_int8(0x01);
4621 }
4622 
4623 void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4624   assert(VM_Version::supports_evex(), "");
4625   int vector_len = AVX_512bit;
4626   int src_enc = src->encoding();
4627   int dst_enc = dst->encoding();
4628   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4629   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, true, vector_len, false, false);
4630   emit_int8(0x38);
4631   emit_int8((unsigned char)(0xC0 | encode));
4632   // 0x00 - insert into lower 256 bits
4633   // 0x01 - insert into upper 256 bits
4634   emit_int8(0x01);
4635 }
4636 
4637 void Assembler::vinserti128h(XMMRegister dst, Address src) {
4638   assert(VM_Version::supports_avx2(), "");
4639   if (VM_Version::supports_evex()) {
4640     tuple_type = EVEX_T4;
4641     input_size_in_bits = EVEX_32bit;
4642   }
4643   InstructionMark im(this);
4644   int vector_len = AVX_256bit;
4645   assert(dst != xnoreg, "sanity");
4646   int dst_enc = dst->encoding();
4647   // swap src<->dst for encoding
4648   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len);
4649   emit_int8(0x38);
4650   emit_operand(dst, src);
4651   // 0x01 - insert into upper 128 bits
4652   emit_int8(0x01);
4653 }
4654 
4655 void Assembler::vextracti128h(XMMRegister dst, XMMRegister src) {
4656   assert(VM_Version::supports_avx(), "");
4657   int vector_len = AVX_256bit;
4658   int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
4659   emit_int8(0x39);
4660   emit_int8((unsigned char)(0xC0 | encode));
4661   // 0x00 - insert into lower 128 bits
4662   // 0x01 - insert into upper 128 bits
4663   emit_int8(0x01);
4664 }
4665 
4666 void Assembler::vextracti128h(Address dst, XMMRegister src) {
4667   assert(VM_Version::supports_avx2(), "");
4668   if (VM_Version::supports_evex()) {
4669     tuple_type = EVEX_T4;
4670     input_size_in_bits = EVEX_32bit;
4671   }
4672   InstructionMark im(this);
4673   int vector_len = AVX_256bit;
4674   assert(src != xnoreg, "sanity");
4675   int src_enc = src->encoding();
4676   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len);
4677   emit_int8(0x39);
4678   emit_operand(src, dst);
4679   // 0x01 - extract from upper 128 bits
4680   emit_int8(0x01);
4681 }
4682 
4683 void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src) {
4684   assert(VM_Version::supports_evex(), "");
4685   int vector_len = AVX_512bit;
4686   int src_enc = src->encoding();
4687   int dst_enc = dst->encoding();
4688   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, true, vector_len, false, false);
4689   emit_int8(0x3B);
4690   emit_int8((unsigned char)(0xC0 | encode));
4691   // 0x01 - extract from upper 256 bits
4692   emit_int8(0x01);
4693 }
4694 
4695 void Assembler::vextracti64x2h(XMMRegister dst, XMMRegister src, int value) {
4696   assert(VM_Version::supports_evex(), "");
4697   int vector_len = AVX_512bit;
4698   int src_enc = src->encoding();
4699   int dst_enc = dst->encoding();
4700   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, true, vector_len, false, false);
4701   emit_int8(0x39);
4702   emit_int8((unsigned char)(0xC0 | encode));
4703   // 0x01 - extract from bits 255:128
4704   // 0x02 - extract from bits 383:256
4705   // 0x03 - extract from bits 511:384
4706   emit_int8(value & 0x3);
4707 }
4708 
4709 void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src) {
4710   assert(VM_Version::supports_evex(), "");
4711   int vector_len = AVX_512bit;
4712   int src_enc = src->encoding();
4713   int dst_enc = dst->encoding();
4714   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, true, vector_len, false, false);
4715   emit_int8(0x1B);
4716   emit_int8((unsigned char)(0xC0 | encode));
4717   // 0x01 - extract from upper 256 bits
4718   emit_int8(0x01);
4719 }
4720 
4721 void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) {
4722   assert(VM_Version::supports_evex(), "");
4723   int vector_len = AVX_512bit;
4724   int src_enc = src->encoding();
4725   int dst_enc = dst->encoding();
4726   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len, false, false);
4727   emit_int8(0x19);
4728   emit_int8((unsigned char)(0xC0 | encode));
4729   // 0x01 - extract from bits 255:128
4730   // 0x02 - extract from bits 383:256
4731   // 0x03 - extract from bits 511:384
4732   emit_int8(value & 0x3);
4733 }
4734 
4735 void Assembler::vextractf64x2h(XMMRegister dst, XMMRegister src, int value) {
4736   assert(VM_Version::supports_evex(), "");
4737   int vector_len = AVX_512bit;
4738   int src_enc = src->encoding();
4739   int dst_enc = dst->encoding();
4740   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, true, vector_len, false, false);
4741   emit_int8(0x19);
4742   emit_int8((unsigned char)(0xC0 | encode));
4743   // 0x01 - extract from bits 255:128
4744   // 0x02 - extract from bits 383:256
4745   // 0x03 - extract from bits 511:384
4746   emit_int8(value & 0x3);
4747 }
4748 
4749 // duplicate 4-bytes integer data from src into 8 locations in dest
4750 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
4751   assert(VM_Version::supports_avx2(), "");
4752   int vector_len = AVX_256bit;
4753   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, false);
4754   emit_int8(0x58);
4755   emit_int8((unsigned char)(0xC0 | encode));
4756 }
4757 
4758 // duplicate 4-bytes integer data from src into 8 locations in dest
4759 void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
4760   assert(VM_Version::supports_evex(), "");
4761   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, false);
4762   emit_int8(0x58);
4763   emit_int8((unsigned char)(0xC0 | encode));
4764 }
4765 
4766 // Carry-Less Multiplication Quadword
4767 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
4768   assert(VM_Version::supports_clmul(), "");
4769   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
4770                                       VEX_OPCODE_0F_3A, false, AVX_128bit, true);
4771   emit_int8(0x44);
4772   emit_int8((unsigned char)(0xC0 | encode));
4773   emit_int8((unsigned char)mask);
4774 }
4775 
4776 // Carry-Less Multiplication Quadword
4777 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
4778   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
4779   int vector_len = AVX_128bit;
4780   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66,
4781                                      vector_len, VEX_OPCODE_0F_3A, true);
4782   emit_int8(0x44);
4783   emit_int8((unsigned char)(0xC0 | encode));
4784   emit_int8((unsigned char)mask);
4785 }
4786 
4787 void Assembler::vzeroupper() {
4788   assert(VM_Version::supports_avx(), "");
4789   if (UseAVX < 3)
4790   {
4791     (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
4792     emit_int8(0x77);
4793   }
4794 }
4795 
4796 
4797 #ifndef _LP64
4798 // 32bit only pieces of the assembler
4799 
4800 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
4801   // NO PREFIX AS NEVER 64BIT
4802   InstructionMark im(this);
4803   emit_int8((unsigned char)0x81);
4804   emit_int8((unsigned char)(0xF8 | src1->encoding()));
4805   emit_data(imm32, rspec, 0);
4806 }
4807 
4808 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
4809   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
4810   InstructionMark im(this);
4811   emit_int8((unsigned char)0x81);
4812   emit_operand(rdi, src1);
4813   emit_data(imm32, rspec, 0);

5290   }
5291 }
5292 
5293 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
5294   if (pre > 0) {
5295     emit_int8(simd_pre[pre]);
5296   }
5297   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
5298                           prefix_and_encode(dst_enc, src_enc);
5299   if (opc > 0) {
5300     emit_int8(0x0F);
5301     int opc2 = simd_opc[opc];
5302     if (opc2 > 0) {
5303       emit_int8(opc2);
5304     }
5305   }
5306   return encode;
5307 }
5308 
5309 
5310 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, int vector_len) {
5311   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
5312     prefix(VEX_3bytes);
5313 
5314     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
5315     byte1 = (~byte1) & 0xE0;
5316     byte1 |= opc;
5317     emit_int8(byte1);
5318 
5319     int byte2 = ((~nds_enc) & 0xf) << 3;
5320     byte2 |= (vex_w ? VEX_W : 0) | ((vector_len > 0) ? 4 : 0) | pre;
5321     emit_int8(byte2);
5322   } else {
5323     prefix(VEX_2bytes);
5324 
5325     int byte1 = vex_r ? VEX_R : 0;
5326     byte1 = (~byte1) & 0x80;
5327     byte1 |= ((~nds_enc) & 0xf) << 3;
5328     byte1 |= ((vector_len > 0 ) ? 4 : 0) | pre;
5329     emit_int8(byte1);
5330   }
5331 }
5332 
5333 // This is a 4 byte encoding
5334 void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v,
5335                             int nds_enc, VexSimdPrefix pre, VexOpcode opc,
5336                             bool is_extended_context, bool is_merge_context,
5337                             int vector_len, bool no_mask_reg ){
5338   // EVEX 0x62 prefix
5339   prefix(EVEX_4bytes);
5340   evex_encoding = (vex_w ? VEX_W : 0) | (evex_r ? EVEX_Rb : 0);
5341 
5342   // P0: byte 2, initialized to RXBR`00mm
5343   // instead of not'd
5344   int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0);
5345   byte2 = (~byte2) & 0xF0;
5346   // confine opc opcode extensions in mm bits to lower two bits
5347   // of form {0F, 0F_38, 0F_3A}
5348   byte2 |= opc;
5349   emit_int8(byte2);
5350 
5351   // P1: byte 3 as Wvvvv1pp
5352   int byte3 = ((~nds_enc) & 0xf) << 3;
5353   // p[10] is always 1
5354   byte3 |= EVEX_F;
5355   byte3 |= (vex_w & 1) << 7;
5356   // confine pre opcode extensions in pp bits to lower two bits
5357   // of form {66, F3, F2}
5358   byte3 |= pre;
5359   emit_int8(byte3);
5360 
5361   // P2: byte 4 as zL'Lbv'aaa
5362   int byte4 = (no_mask_reg) ? 0 : 1; // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
5363   // EVEX.v` for extending EVEX.vvvv or VIDX
5364   byte4 |= (evex_v ? 0: EVEX_V);
5365   // third EXEC.b for broadcast actions
5366   byte4 |= (is_extended_context ? EVEX_Rb : 0);
5367   // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
5368   byte4 |= ((vector_len) & 0x3) << 5;
5369   // last is EVEX.z for zero/merge actions
5370   byte4 |= (is_merge_context ? EVEX_Z : 0);
5371   emit_int8(byte4);
5372 }
5373 
5374 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre,
5375                            VexOpcode opc, bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg) {
5376   bool vex_r = (xreg_enc >= 8);
5377   bool vex_b = adr.base_needs_rex();
5378   bool vex_x = adr.index_needs_rex();
5379   avx_vector_len = vector_len;
5380   if ((UseAVX > 2) && (legacy_mode == false))
5381   {
5382     bool evex_r = false;
5383     bool evex_v = false;
5384     if (vex_b == false && vex_x == false)
5385     {
5386       // can use vex_x as bank extender on rm encoding
5387       vex_x = (xreg_enc >= 16);
5388     } else {
5389       evex_v = (nds_enc >= 16);
5390       evex_r = (xreg_enc >= 16);
5391     }
5392     is_evex_instruction = true;
5393     evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg);
5394   } else {
5395     vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len);
5396   }
5397 }
5398 
5399 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
5400                                      bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg ) {
5401   bool vex_r = (dst_enc >= 8);
5402   bool vex_b = (src_enc >= 8);
5403   bool vex_x = false;
5404   avx_vector_len = vector_len;
5405   if ((UseAVX > 2) && (legacy_mode == false))
5406   {
5407     bool evex_r = (dst_enc >= 16);
5408     bool evex_v = (nds_enc >= 16);
5409     // can use vex_x as bank extender on rm encoding
5410     vex_x = (src_enc >= 16);
5411     evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg);
5412   } else {
5413     vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len);
5414   }
5415 
5416   // return modrm byte components for operands
5417   return (((dst_enc & 7) << 3) | (src_enc & 7));
5418 }
5419 
5420 
5421 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
5422                             bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) {
5423   if (UseAVX > 0) {
5424     int xreg_enc = xreg->encoding();
5425     int  nds_enc = nds->is_valid() ? nds->encoding() : 0;
5426     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg);
5427   } else {
5428     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
5429     rex_prefix(adr, xreg, pre, opc, rex_w);
5430   }
5431 }
5432 
5433 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
5434                                       bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) {
5435   int dst_enc = dst->encoding();
5436   int src_enc = src->encoding();
5437   if (UseAVX > 0) {
5438     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
5439     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg);
5440   } else {
5441     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
5442     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
5443   }
5444 }
5445 
5446 int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre,
5447                                       bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) {
5448   int dst_enc = dst->encoding();
5449   int src_enc = src->encoding();
5450   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
5451   return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg);
5452 }
5453 
5454 int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre,
5455                                       bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) {
5456   int dst_enc = dst->encoding();
5457   int src_enc = src->encoding();
5458   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
5459   return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg);
5460 }
5461 
5462 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg) {
5463   InstructionMark im(this);
5464   simd_prefix(dst, dst, src, pre, no_mask_reg);
5465   emit_int8(opcode);
5466   emit_operand(dst, src);
5467 }
5468 
5469 void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg) {
5470   InstructionMark im(this);
5471   simd_prefix_q(dst, dst, src, pre, no_mask_reg);
5472   emit_int8(opcode);
5473   emit_operand(dst, src);
5474 }
5475 
5476 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
5477   int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit);
5478   emit_int8(opcode);
5479   emit_int8((unsigned char)(0xC0 | encode));
5480 }
5481 
5482 void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
5483   int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, true, AVX_128bit);
5484   emit_int8(opcode);
5485   emit_int8((unsigned char)(0xC0 | encode));
5486 }
5487 
5488 // Versions with no second source register (non-destructive source).
5489 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) {
5490   InstructionMark im(this);
5491   simd_prefix(dst, xnoreg, src, pre, opNoRegMask);
5492   emit_int8(opcode);
5493   emit_operand(dst, src);
5494 }
5495 
5496 void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) {
5497   InstructionMark im(this);
5498   simd_prefix_q(dst, xnoreg, src, pre, opNoRegMask);
5499   emit_int8(opcode);
5500   emit_operand(dst, src);
5501 }
5502 
5503 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
5504   int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit);
5505   emit_int8(opcode);
5506   emit_int8((unsigned char)(0xC0 | encode));
5507 }
5508 
5509 void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
5510   int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, true, AVX_128bit);
5511   emit_int8(opcode);
5512   emit_int8((unsigned char)(0xC0 | encode));
5513 }
5514 
5515 // 3-operands AVX instructions
5516 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
5517                                Address src, VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
5518   InstructionMark im(this);
5519   vex_prefix(dst, nds, src, pre, vector_len, no_mask_reg);
5520   emit_int8(opcode);
5521   emit_operand(dst, src);
5522 }
5523 
5524 void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
5525                                  Address src, VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
5526   InstructionMark im(this);
5527   vex_prefix_q(dst, nds, src, pre, vector_len, no_mask_reg);
5528   emit_int8(opcode);
5529   emit_operand(dst, src);
5530 }
5531 
5532 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src,
5533                                VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
5534   int encode = vex_prefix_and_encode(dst, nds, src, pre, vector_len, VEX_OPCODE_0F, false, no_mask_reg);
5535   emit_int8(opcode);
5536   emit_int8((unsigned char)(0xC0 | encode));
5537 }
5538 
5539 void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src,
5540                                  VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
5541   int src_enc = src->encoding();
5542   int dst_enc = dst->encoding();
5543   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
5544   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
5545   emit_int8(opcode);
5546   emit_int8((unsigned char)(0xC0 | encode));
5547 }
5548 
5549 #ifndef _LP64
5550 
5551 void Assembler::incl(Register dst) {
5552   // Don't use it directly. Use MacroAssembler::incrementl() instead.
5553   emit_int8(0x40 | dst->encoding());
5554 }
5555 
5556 void Assembler::lea(Register dst, Address src) {
5557   leal(dst, src);
5558 }
5559 
5560 void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
5561   InstructionMark im(this);
5562   emit_int8((unsigned char)0xC7);
5563   emit_operand(rax, dst);
5564   emit_data((int)imm32, rspec, 0);

6022 void Assembler::andq(Register dst, Address src) {
6023   InstructionMark im(this);
6024   prefixq(src, dst);
6025   emit_int8(0x23);
6026   emit_operand(dst, src);
6027 }
6028 
6029 void Assembler::andq(Register dst, Register src) {
6030   (void) prefixq_and_encode(dst->encoding(), src->encoding());
6031   emit_arith(0x23, 0xC0, dst, src);
6032 }
6033 
6034 void Assembler::andnq(Register dst, Register src1, Register src2) {
6035   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6036   int encode = vex_prefix_0F38_and_encode_q(dst, src1, src2);
6037   emit_int8((unsigned char)0xF2);
6038   emit_int8((unsigned char)(0xC0 | encode));
6039 }
6040 
6041 void Assembler::andnq(Register dst, Register src1, Address src2) {
6042   if (VM_Version::supports_evex()) {
6043     tuple_type = EVEX_T1S;
6044     input_size_in_bits = EVEX_64bit;
6045   }
6046   InstructionMark im(this);
6047   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6048   vex_prefix_0F38_q(dst, src1, src2);
6049   emit_int8((unsigned char)0xF2);
6050   emit_operand(dst, src2);
6051 }
6052 
6053 void Assembler::bsfq(Register dst, Register src) {
6054   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6055   emit_int8(0x0F);
6056   emit_int8((unsigned char)0xBC);
6057   emit_int8((unsigned char)(0xC0 | encode));
6058 }
6059 
6060 void Assembler::bsrq(Register dst, Register src) {
6061   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6062   emit_int8(0x0F);
6063   emit_int8((unsigned char)0xBD);
6064   emit_int8((unsigned char)(0xC0 | encode));
6065 }

6167   emit_arith(0x3B, 0xC0, dst, src);
6168 }
6169 
6170 void Assembler::cmpq(Register dst, Address  src) {
6171   InstructionMark im(this);
6172   prefixq(src, dst);
6173   emit_int8(0x3B);
6174   emit_operand(dst, src);
6175 }
6176 
6177 void Assembler::cmpxchgq(Register reg, Address adr) {
6178   InstructionMark im(this);
6179   prefixq(adr, reg);
6180   emit_int8(0x0F);
6181   emit_int8((unsigned char)0xB1);
6182   emit_operand(reg, adr);
6183 }
6184 
6185 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
6186   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6187   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2, true);
6188   emit_int8(0x2A);
6189   emit_int8((unsigned char)(0xC0 | encode));
6190 }
6191 
6192 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
6193   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6194   if (VM_Version::supports_evex()) {
6195     tuple_type = EVEX_T1S;
6196     input_size_in_bits = EVEX_32bit;
6197   }
6198   InstructionMark im(this);
6199   simd_prefix_q(dst, dst, src, VEX_SIMD_F2, true);
6200   emit_int8(0x2A);
6201   emit_operand(dst, src);
6202 }
6203 
6204 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
6205   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6206   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3, true);
6207   emit_int8(0x2A);
6208   emit_int8((unsigned char)(0xC0 | encode));
6209 }
6210 
6211 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
6212   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6213   if (VM_Version::supports_evex()) {
6214     tuple_type = EVEX_T1S;
6215     input_size_in_bits = EVEX_32bit;
6216   }
6217   InstructionMark im(this);
6218   simd_prefix_q(dst, dst, src, VEX_SIMD_F3, true);
6219   emit_int8(0x2A);
6220   emit_operand(dst, src);
6221 }
6222 
6223 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
6224   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6225   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, true);
6226   emit_int8(0x2C);
6227   emit_int8((unsigned char)(0xC0 | encode));
6228 }
6229 
6230 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
6231   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6232   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, true);
6233   emit_int8(0x2C);
6234   emit_int8((unsigned char)(0xC0 | encode));
6235 }
6236 
6237 void Assembler::decl(Register dst) {
6238   // Don't use it directly. Use MacroAssembler::decrementl() instead.
6239   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
6240   int encode = prefix_and_encode(dst->encoding());
6241   emit_int8((unsigned char)0xFF);
6242   emit_int8((unsigned char)(0xC8 | encode));
6243 }
6244 
6245 void Assembler::decq(Register dst) {
6246   // Don't use it directly. Use MacroAssembler::decrementq() instead.
6247   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
6248   int encode = prefixq_and_encode(dst->encoding());
6249   emit_int8((unsigned char)0xFF);
6250   emit_int8(0xC8 | encode);
6251 }
6252

6381 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
6382   InstructionMark im(this);
6383   prefix(src1);
6384   emit_int8((unsigned char)0x81);
6385   emit_operand(rax, src1, 4);
6386   emit_data((int)imm32, rspec, narrow_oop_operand);
6387 }
6388 
6389 void Assembler::lzcntq(Register dst, Register src) {
6390   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
6391   emit_int8((unsigned char)0xF3);
6392   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6393   emit_int8(0x0F);
6394   emit_int8((unsigned char)0xBD);
6395   emit_int8((unsigned char)(0xC0 | encode));
6396 }
6397 
6398 void Assembler::movdq(XMMRegister dst, Register src) {
6399   // table D-1 says MMX/SSE2
6400   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6401   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66, true);
6402   emit_int8(0x6E);
6403   emit_int8((unsigned char)(0xC0 | encode));
6404 }
6405 
6406 void Assembler::movdq(Register dst, XMMRegister src) {
6407   // table D-1 says MMX/SSE2
6408   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6409   // swap src/dst to get correct prefix
6410   int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66, true);
6411   emit_int8(0x7E);
6412   emit_int8((unsigned char)(0xC0 | encode));
6413 }
6414 
6415 void Assembler::movq(Register dst, Register src) {
6416   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6417   emit_int8((unsigned char)0x8B);
6418   emit_int8((unsigned char)(0xC0 | encode));
6419 }
6420 
6421 void Assembler::movq(Register dst, Address src) {
6422   InstructionMark im(this);
6423   prefixq(src, dst);
6424   emit_int8((unsigned char)0x8B);
6425   emit_operand(dst, src);
6426 }
6427 
6428 void Assembler::movq(Address dst, Register src) {
6429   InstructionMark im(this);
6430   prefixq(dst, src);

6523   emit_int8((unsigned char)0x0F);
6524   emit_int8((unsigned char)0xB7);
6525   emit_int8((unsigned char)(0xC0 | encode));
6526 }
6527 
6528 void Assembler::mulq(Address src) {
6529   InstructionMark im(this);
6530   prefixq(src);
6531   emit_int8((unsigned char)0xF7);
6532   emit_operand(rsp, src);
6533 }
6534 
6535 void Assembler::mulq(Register src) {
6536   int encode = prefixq_and_encode(src->encoding());
6537   emit_int8((unsigned char)0xF7);
6538   emit_int8((unsigned char)(0xE0 | encode));
6539 }
6540 
6541 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
6542   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
6543   int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(),
6544                                      VEX_SIMD_F2, VEX_OPCODE_0F_38, true, AVX_128bit, true, false);
6545   emit_int8((unsigned char)0xF6);
6546   emit_int8((unsigned char)(0xC0 | encode));
6547 }
6548 
6549 void Assembler::negq(Register dst) {
6550   int encode = prefixq_and_encode(dst->encoding());
6551   emit_int8((unsigned char)0xF7);
6552   emit_int8((unsigned char)(0xD8 | encode));
6553 }
6554 
6555 void Assembler::notq(Register dst) {
6556   int encode = prefixq_and_encode(dst->encoding());
6557   emit_int8((unsigned char)0xF7);
6558   emit_int8((unsigned char)(0xD0 | encode));
6559 }
6560 
6561 void Assembler::orq(Address dst, int32_t imm32) {
6562   InstructionMark im(this);
6563   prefixq(dst);
6564   emit_int8((unsigned char)0x81);

6673     emit_int8((unsigned char)(0xD0 | encode));
6674     emit_int8(imm8);
6675   }
6676 }
6677 
6678 void Assembler::rorq(Register dst, int imm8) {
6679   assert(isShiftCount(imm8 >> 1), "illegal shift count");
6680   int encode = prefixq_and_encode(dst->encoding());
6681   if (imm8 == 1) {
6682     emit_int8((unsigned char)0xD1);
6683     emit_int8((unsigned char)(0xC8 | encode));
6684   } else {
6685     emit_int8((unsigned char)0xC1);
6686     emit_int8((unsigned char)(0xc8 | encode));
6687     emit_int8(imm8);
6688   }
6689 }
6690 
6691 void Assembler::rorxq(Register dst, Register src, int imm8) {
6692   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
6693   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2,
6694                                      VEX_OPCODE_0F_3A, true, AVX_128bit, true, false);
6695   emit_int8((unsigned char)0xF0);
6696   emit_int8((unsigned char)(0xC0 | encode));
6697   emit_int8(imm8);
6698 }
6699 
6700 void Assembler::sarq(Register dst, int imm8) {
6701   assert(isShiftCount(imm8 >> 1), "illegal shift count");
6702   int encode = prefixq_and_encode(dst->encoding());
6703   if (imm8 == 1) {
6704     emit_int8((unsigned char)0xD1);
6705     emit_int8((unsigned char)(0xF8 | encode));
6706   } else {
6707     emit_int8((unsigned char)0xC1);
6708     emit_int8((unsigned char)(0xF8 | encode));
6709     emit_int8(imm8);
6710   }
6711 }
6712 
6713 void Assembler::sarq(Register dst) {
6714   int encode = prefixq_and_encode(dst->encoding());

< prev index next >