hotspot Sdiff src/cpu/x86/vm

src/cpu/x86/vm/assembler_x86.cpp

  37 #include "runtime/sharedRuntime.hpp"
  38 #include "runtime/stubRoutines.hpp"
  39 #include "utilities/macros.hpp"
  40 #if INCLUDE_ALL_GCS
  41 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  42 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
  43 #include "gc_implementation/g1/heapRegion.hpp"
  44 #endif // INCLUDE_ALL_GCS
  45 
  46 #ifdef PRODUCT
  47 #define BLOCK_COMMENT(str) /* nothing */
  48 #define STOP(error) stop(error)
  49 #else
  50 #define BLOCK_COMMENT(str) block_comment(str)
  51 #define STOP(error) block_comment(error); stop(error)
  52 #endif
  53 
  54 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  55 // Implementation of AddressLiteral
  56 






























  57 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  58   _is_lval = false;
  59   _target = target;
  60   switch (rtype) {
  61   case relocInfo::oop_type:
  62   case relocInfo::metadata_type:
  63     // Oops are a special case. Normally they would be their own section
  64     // but in cases like icBuffer they are literals in the code stream that
  65     // we don't have a section for. We use none so that we get a literal address
  66     // which is always patchable.
  67     break;
  68   case relocInfo::external_word_type:
  69     _rspec = external_word_Relocation::spec(target);
  70     break;
  71   case relocInfo::internal_word_type:
  72     _rspec = internal_word_Relocation::spec(target);
  73     break;
  74   case relocInfo::opt_virtual_call_type:
  75     _rspec = opt_virtual_call_Relocation::spec();
  76     break;

 167     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 168     madr._rspec = rspec;
 169     return madr;
 170   } else {
 171     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 172     madr._rspec = rspec;
 173     return madr;
 174   }
 175 }
 176 
 177 // Implementation of Assembler
 178 
 179 int AbstractAssembler::code_fill_byte() {
 180   return (u_char)'\xF4'; // hlt
 181 }
 182 
 183 // make this go away someday
 184 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
 185   if (rtype == relocInfo::none)
 186         emit_int32(data);
 187   else  emit_data(data, Relocation::spec_simple(rtype), format);

 188 }
 189 
 190 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
 191   assert(imm_operand == 0, "default format must be immediate in this file");
 192   assert(inst_mark() != NULL, "must be inside InstructionMark");
 193   if (rspec.type() !=  relocInfo::none) {
 194     #ifdef ASSERT
 195       check_relocation(rspec, format);
 196     #endif
 197     // Do not use AbstractAssembler::relocate, which is not intended for
 198     // embedded words.  Instead, relocate to the enclosing instruction.
 199 
 200     // hack. call32 is too wide for mask so use disp32
 201     if (format == call32_operand)
 202       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 203     else
 204       code_section()->relocate(inst_mark(), rspec, format);
 205   }
 206   emit_int32(data);
 207 }

 256   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 257   if (is8bit(imm32)) {
 258     emit_int8(op1 | 0x02); // set sign bit
 259     emit_operand(rm, adr, 1);
 260     emit_int8(imm32 & 0xFF);
 261   } else {
 262     emit_int8(op1);
 263     emit_operand(rm, adr, 4);
 264     emit_int32(imm32);
 265   }
 266 }
 267 
 268 
 269 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 270   assert(isByte(op1) && isByte(op2), "wrong opcode");
 271   emit_int8(op1);
 272   emit_int8(op2 | encode(dst) << 3 | encode(src));
 273 }
 274 
 275 











































































































































































 276 void Assembler::emit_operand(Register reg, Register base, Register index,
 277                              Address::ScaleFactor scale, int disp,
 278                              RelocationHolder const& rspec,
 279                              int rip_relative_correction) {
 280   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 281 
 282   // Encode the registers as needed in the fields they are used in
 283 
 284   int regenc = encode(reg) << 3;
 285   int indexenc = index->is_valid() ? encode(index) << 3 : 0;
 286   int baseenc = base->is_valid() ? encode(base) : 0;
 287 
 288   if (base->is_valid()) {
 289     if (index->is_valid()) {
 290       assert(scale != Address::no_scale, "inconsistent address");
 291       // [base + index*scale + disp]
 292       if (disp == 0 && rtype == relocInfo::none  &&
 293           base != rbp LP64_ONLY(&& base != r13)) {
 294         // [base + index*scale]
 295         // [00 reg 100][ss index base]
 296         assert(index != rsp, "illegal addressing mode");
 297         emit_int8(0x04 | regenc);
 298         emit_int8(scale << 6 | indexenc | baseenc);
 299       } else if (is8bit(disp) && rtype == relocInfo::none) {
 300         // [base + index*scale + imm8]
 301         // [01 reg 100][ss index base] imm8
 302         assert(index != rsp, "illegal addressing mode");
 303         emit_int8(0x44 | regenc);
 304         emit_int8(scale << 6 | indexenc | baseenc);
 305         emit_int8(disp & 0xFF);
 306       } else {
 307         // [base + index*scale + disp32]
 308         // [10 reg 100][ss index base] disp32
 309         assert(index != rsp, "illegal addressing mode");
 310         emit_int8(0x84 | regenc);
 311         emit_int8(scale << 6 | indexenc | baseenc);
 312         emit_data(disp, rspec, disp32_operand);
 313       }
 314     } else if (base == rsp LP64_ONLY(|| base == r12)) {
 315       // [rsp + disp]
 316       if (disp == 0 && rtype == relocInfo::none) {
 317         // [rsp]
 318         // [00 reg 100][00 100 100]
 319         emit_int8(0x04 | regenc);
 320         emit_int8(0x24);
 321       } else if (is8bit(disp) && rtype == relocInfo::none) {
 322         // [rsp + imm8]
 323         // [01 reg 100][00 100 100] disp8
 324         emit_int8(0x44 | regenc);
 325         emit_int8(0x24);
 326         emit_int8(disp & 0xFF);
 327       } else {
 328         // [rsp + imm32]
 329         // [10 reg 100][00 100 100] disp32
 330         emit_int8(0x84 | regenc);
 331         emit_int8(0x24);
 332         emit_data(disp, rspec, disp32_operand);
 333       }
 334     } else {
 335       // [base + disp]
 336       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
 337       if (disp == 0 && rtype == relocInfo::none &&
 338           base != rbp LP64_ONLY(&& base != r13)) {
 339         // [base]
 340         // [00 reg base]
 341         emit_int8(0x00 | regenc | baseenc);
 342       } else if (is8bit(disp) && rtype == relocInfo::none) {
 343         // [base + disp8]
 344         // [01 reg base] disp8
 345         emit_int8(0x40 | regenc | baseenc);
 346         emit_int8(disp & 0xFF);
 347       } else {
 348         // [base + disp32]
 349         // [10 reg base] disp32
 350         emit_int8(0x80 | regenc | baseenc);
 351         emit_data(disp, rspec, disp32_operand);
 352       }
 353     }
 354   } else {
 355     if (index->is_valid()) {
 356       assert(scale != Address::no_scale, "inconsistent address");
 357       // [index*scale + disp]
 358       // [00 reg 100][ss index 101] disp32
 359       assert(index != rsp, "illegal addressing mode");
 360       emit_int8(0x04 | regenc);
 361       emit_int8(scale << 6 | indexenc | 0x05);
 362       emit_data(disp, rspec, disp32_operand);

 372       // at the start of the instruction. That needs more correction here.
 373       // intptr_t disp = target - next_ip;
 374       assert(inst_mark() != NULL, "must be inside InstructionMark");
 375       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 376       int64_t adjusted = disp;
 377       // Do rip-rel adjustment for 64bit
 378       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 379       assert(is_simm32(adjusted),
 380              "must be 32bit offset (RIP relative address)");
 381       emit_data((int32_t) adjusted, rspec, disp32_operand);
 382 
 383     } else {
 384       // 32bit never did this, did everything as the rip-rel/disp code above
 385       // [disp] ABSOLUTE
 386       // [00 reg 100][00 100 101] disp32
 387       emit_int8(0x04 | regenc);
 388       emit_int8(0x25);
 389       emit_data(disp, rspec, disp32_operand);
 390     }
 391   }

 392 }
 393 
 394 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 395                              Address::ScaleFactor scale, int disp,
 396                              RelocationHolder const& rspec) {








 397   emit_operand((Register)reg, base, index, scale, disp, rspec);
 398 }
 399 
 400 // Secret local extension to Assembler::WhichOperand:
 401 #define end_pc_operand (_WhichOperand_limit)
 402 
 403 address Assembler::locate_operand(address inst, WhichOperand which) {
 404   // Decode the given instruction, and return the address of
 405   // an embedded 32-bit operand word.
 406 
 407   // If "which" is disp32_operand, selects the displacement portion
 408   // of an effective address specifier.
 409   // If "which" is imm64_operand, selects the trailing immediate constant.
 410   // If "which" is call32_operand, selects the displacement of a call or jump.
 411   // Caller is responsible for ensuring that there is such an operand,
 412   // and that it is 32/64 bits wide.
 413 
 414   // If "which" is end_pc_operand, find the end of the instruction.
 415 
 416   address ip = inst;

 669     // First byte
 670     if ((0xFF & *inst) == VEX_3bytes) {
 671       ip++; // third byte
 672       is_64bit = ((VEX_W & *ip) == VEX_W);
 673     }
 674     ip++; // opcode
 675     // To find the end of instruction (which == end_pc_operand).
 676     switch (0xFF & *ip) {
 677     case 0x61: // pcmpestri r, r/a, #8
 678     case 0x70: // pshufd r, r/a, #8
 679     case 0x73: // psrldq r, #8
 680       tail_size = 1;  // the imm8
 681       break;
 682     default:
 683       break;
 684     }
 685     ip++; // skip opcode
 686     debug_only(has_disp32 = true); // has both kinds of operands!
 687     break;
 688 























 689   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 690   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 691   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 692   case 0xDD: // fld_d a; fst_d a; fstp_d a
 693   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 694   case 0xDF: // fild_d a; fistp_d a
 695   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 696   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 697   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 698     debug_only(has_disp32 = true);
 699     break;
 700 
 701   case 0xE8: // call rdisp32
 702   case 0xE9: // jmp  rdisp32
 703     if (which == end_pc_operand)  return ip + 4;
 704     assert(which == call32_operand, "call has no disp32 or imm");
 705     return ip;
 706 
 707   case 0xF0:                    // Lock
 708     assert(os::is_MP(), "only on MP");

 968   emit_int8(0x0F);
 969   emit_int8(0x1F);
 970   emit_int8((unsigned char)0x80);
 971                    // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
 972   emit_int32(0);   // 32-bits offset (4 bytes)
 973 }
 974 
 975 void Assembler::addr_nop_8() {
 976   assert(UseAddressNop, "no CPU support");
 977   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
 978   emit_int8(0x0F);
 979   emit_int8(0x1F);
 980   emit_int8((unsigned char)0x84);
 981                    // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
 982   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
 983   emit_int32(0);   // 32-bits offset (4 bytes)
 984 }
 985 
 986 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
 987   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



 988   emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);

 989 }
 990 
 991 void Assembler::addsd(XMMRegister dst, Address src) {
 992   NOT_LP64(assert(VM_Version::supports_sse2(), ""));





 993   emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);

 994 }
 995 
 996 void Assembler::addss(XMMRegister dst, XMMRegister src) {
 997   NOT_LP64(assert(VM_Version::supports_sse(), ""));
 998   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
 999 }
1000 
1001 void Assembler::addss(XMMRegister dst, Address src) {
1002   NOT_LP64(assert(VM_Version::supports_sse(), ""));




1003   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
1004 }
1005 
1006 void Assembler::aesdec(XMMRegister dst, Address src) {
1007   assert(VM_Version::supports_aes(), "");
1008   InstructionMark im(this);
1009   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);

1010   emit_int8((unsigned char)0xDE);
1011   emit_operand(dst, src);
1012 }
1013 
1014 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1015   assert(VM_Version::supports_aes(), "");
1016   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);

1017   emit_int8((unsigned char)0xDE);
1018   emit_int8(0xC0 | encode);
1019 }
1020 
1021 void Assembler::aesdeclast(XMMRegister dst, Address src) {
1022   assert(VM_Version::supports_aes(), "");
1023   InstructionMark im(this);
1024   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);

1025   emit_int8((unsigned char)0xDF);
1026   emit_operand(dst, src);
1027 }
1028 
1029 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1030   assert(VM_Version::supports_aes(), "");
1031   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);

1032   emit_int8((unsigned char)0xDF);
1033   emit_int8((unsigned char)(0xC0 | encode));
1034 }
1035 
1036 void Assembler::aesenc(XMMRegister dst, Address src) {
1037   assert(VM_Version::supports_aes(), "");
1038   InstructionMark im(this);
1039   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);

1040   emit_int8((unsigned char)0xDC);
1041   emit_operand(dst, src);
1042 }
1043 
1044 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1045   assert(VM_Version::supports_aes(), "");
1046   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);

1047   emit_int8((unsigned char)0xDC);
1048   emit_int8(0xC0 | encode);
1049 }
1050 
1051 void Assembler::aesenclast(XMMRegister dst, Address src) {
1052   assert(VM_Version::supports_aes(), "");
1053   InstructionMark im(this);
1054   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);

1055   emit_int8((unsigned char)0xDD);
1056   emit_operand(dst, src);
1057 }
1058 
1059 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1060   assert(VM_Version::supports_aes(), "");
1061   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);

1062   emit_int8((unsigned char)0xDD);
1063   emit_int8((unsigned char)(0xC0 | encode));
1064 }
1065 
1066 
1067 void Assembler::andl(Address dst, int32_t imm32) {
1068   InstructionMark im(this);
1069   prefix(dst);
1070   emit_int8((unsigned char)0x81);
1071   emit_operand(rsp, dst, 4);
1072   emit_int32(imm32);
1073 }
1074 
1075 void Assembler::andl(Register dst, int32_t imm32) {
1076   prefix(dst);
1077   emit_arith(0x81, 0xE0, dst, imm32);
1078 }
1079 
1080 void Assembler::andl(Register dst, Address src) {
1081   InstructionMark im(this);
1082   prefix(src, dst);
1083   emit_int8(0x23);
1084   emit_operand(dst, src);
1085 }
1086 
1087 void Assembler::andl(Register dst, Register src) {
1088   (void) prefix_and_encode(dst->encoding(), src->encoding());
1089   emit_arith(0x23, 0xC0, dst, src);
1090 }
1091 
1092 void Assembler::andnl(Register dst, Register src1, Register src2) {
1093   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1094   int encode = vex_prefix_0F38_and_encode(dst, src1, src2);
1095   emit_int8((unsigned char)0xF2);
1096   emit_int8((unsigned char)(0xC0 | encode));
1097 }
1098 
1099 void Assembler::andnl(Register dst, Register src1, Address src2) {
1100   InstructionMark im(this);
1101   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1102   vex_prefix_0F38(dst, src1, src2);
1103   emit_int8((unsigned char)0xF2);
1104   emit_operand(dst, src2);
1105 }
1106 
1107 void Assembler::bsfl(Register dst, Register src) {
1108   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1109   emit_int8(0x0F);
1110   emit_int8((unsigned char)0xBC);
1111   emit_int8((unsigned char)(0xC0 | encode));
1112 }
1113 
1114 void Assembler::bsrl(Register dst, Register src) {
1115   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1116   emit_int8(0x0F);
1117   emit_int8((unsigned char)0xBD);
1118   emit_int8((unsigned char)(0xC0 | encode));
1119 }
1120 
1121 void Assembler::bswapl(Register reg) { // bswap
1122   int encode = prefix_and_encode(reg->encoding());
1123   emit_int8(0x0F);
1124   emit_int8((unsigned char)(0xC8 | encode));
1125 }
1126 
1127 void Assembler::blsil(Register dst, Register src) {
1128   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1129   int encode = vex_prefix_0F38_and_encode(rbx, dst, src);
1130   emit_int8((unsigned char)0xF3);
1131   emit_int8((unsigned char)(0xC0 | encode));
1132 }
1133 
1134 void Assembler::blsil(Register dst, Address src) {
1135   InstructionMark im(this);
1136   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1137   vex_prefix_0F38(rbx, dst, src);
1138   emit_int8((unsigned char)0xF3);
1139   emit_operand(rbx, src);
1140 }
1141 
1142 void Assembler::blsmskl(Register dst, Register src) {
1143   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1144   int encode = vex_prefix_0F38_and_encode(rdx, dst, src);
1145   emit_int8((unsigned char)0xF3);
1146   emit_int8((unsigned char)(0xC0 | encode));
1147 }
1148 
1149 void Assembler::blsmskl(Register dst, Address src) {
1150   InstructionMark im(this);
1151   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1152   vex_prefix_0F38(rdx, dst, src);
1153   emit_int8((unsigned char)0xF3);
1154   emit_operand(rdx, src);
1155 }
1156 
1157 void Assembler::blsrl(Register dst, Register src) {
1158   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1159   int encode = vex_prefix_0F38_and_encode(rcx, dst, src);
1160   emit_int8((unsigned char)0xF3);
1161   emit_int8((unsigned char)(0xC0 | encode));
1162 }
1163 
1164 void Assembler::blsrl(Register dst, Address src) {
1165   InstructionMark im(this);
1166   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1167   vex_prefix_0F38(rcx, dst, src);
1168   emit_int8((unsigned char)0xF3);
1169   emit_operand(rcx, src);
1170 }
1171 
1172 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1173   // suspect disp32 is always good
1174   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1175 
1176   if (L.is_bound()) {
1177     const int long_size = 5;
1178     int offs = (int)( target(L) - pc() );
1179     assert(offs <= 0, "assembler error");
1180     InstructionMark im(this);
1181     // 1110 1000 #32-bit disp
1182     emit_int8((unsigned char)0xE8);
1183     emit_data(offs - long_size, rtype, operand);
1184   } else {
1185     InstructionMark im(this);
1186     // 1110 1000 #32-bit disp
1187     L.add_patch_at(code(), locator());

1295   emit_int8(0x0F);
1296   emit_int8((unsigned char)0xB1);
1297   emit_operand(reg, adr);
1298 }
1299 
1300 // The 8-bit cmpxchg compares the value at adr with the contents of rax,
1301 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1302 // The ZF is set if the compared values were equal, and cleared otherwise.
1303 void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg
1304   InstructionMark im(this);
1305   prefix(adr, reg, true);
1306   emit_int8(0x0F);
1307   emit_int8((unsigned char)0xB0);
1308   emit_operand(reg, adr);
1309 }
1310 
1311 void Assembler::comisd(XMMRegister dst, Address src) {
1312   // NOTE: dbx seems to decode this as comiss even though the
1313   // 0x66 is there. Strangly ucomisd comes out correct
1314   NOT_LP64(assert(VM_Version::supports_sse2(), ""));





1315   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);

1316 }
1317 
1318 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1319   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



1320   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);

1321 }
1322 
1323 void Assembler::comiss(XMMRegister dst, Address src) {




1324   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1325   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
1326 }
1327 
1328 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1329   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1330   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
1331 }
1332 
1333 void Assembler::cpuid() {
1334   emit_int8(0x0F);
1335   emit_int8((unsigned char)0xA2);
1336 }
1337 
1338 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1339   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1340   emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3);
1341 }
1342 
1343 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1344   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1345   emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE);
1346 }
1347 
1348 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1349   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



1350   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);

1351 }
1352 
1353 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1354   NOT_LP64(assert(VM_Version::supports_sse2(), ""));





1355   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);

1356 }
1357 
1358 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1359   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1360   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);





1361   emit_int8(0x2A);
1362   emit_int8((unsigned char)(0xC0 | encode));
1363 }
1364 
1365 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1366   NOT_LP64(assert(VM_Version::supports_sse2(), ""));





1367   emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);

1368 }
1369 
1370 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1371   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1372   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
1373   emit_int8(0x2A);
1374   emit_int8((unsigned char)(0xC0 | encode));
1375 }
1376 
1377 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {




1378   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1379   emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3);
1380 }
1381 
1382 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1383   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1384   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1385 }
1386 
1387 void Assembler::cvtss2sd(XMMRegister dst, Address src) {




1388   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1389   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1390 }
1391 
1392 
1393 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1394   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1395   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
1396   emit_int8(0x2C);
1397   emit_int8((unsigned char)(0xC0 | encode));
1398 }
1399 
1400 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1401   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1402   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
1403   emit_int8(0x2C);
1404   emit_int8((unsigned char)(0xC0 | encode));
1405 }
1406 
1407 void Assembler::decl(Address dst) {
1408   // Don't use it directly. Use MacroAssembler::decrement() instead.
1409   InstructionMark im(this);
1410   prefix(dst);
1411   emit_int8((unsigned char)0xFF);
1412   emit_operand(rcx, dst);
1413 }
1414 
1415 void Assembler::divsd(XMMRegister dst, Address src) {
1416   NOT_LP64(assert(VM_Version::supports_sse2(), ""));





1417   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);

1418 }
1419 
1420 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1421   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



1422   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);

1423 }
1424 
1425 void Assembler::divss(XMMRegister dst, Address src) {




1426   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1427   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1428 }
1429 
1430 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1431   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1432   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1433 }
1434 
1435 void Assembler::emms() {
1436   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1437   emit_int8(0x0F);
1438   emit_int8(0x77);
1439 }
1440 
1441 void Assembler::hlt() {
1442   emit_int8((unsigned char)0xF4);
1443 }
1444 
1445 void Assembler::idivl(Register src) {

1658   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1659   emit_int8(0x0F);
1660   emit_int8((unsigned char)0xBD);
1661   emit_int8((unsigned char)(0xC0 | encode));
1662 }
1663 
1664 // Emit mfence instruction
1665 void Assembler::mfence() {
1666   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
1667   emit_int8(0x0F);
1668   emit_int8((unsigned char)0xAE);
1669   emit_int8((unsigned char)0xF0);
1670 }
1671 
1672 void Assembler::mov(Register dst, Register src) {
1673   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
1674 }
1675 
1676 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1677   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



1678   emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);

1679 }
1680 
1681 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1682   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1683   emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE);
1684 }
1685 
1686 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
1687   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1688   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE);

1689   emit_int8(0x16);
1690   emit_int8((unsigned char)(0xC0 | encode));
1691 }
1692 
1693 void Assembler::movb(Register dst, Address src) {
1694   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
1695   InstructionMark im(this);
1696   prefix(src, dst, true);
1697   emit_int8((unsigned char)0x8A);
1698   emit_operand(dst, src);
1699 }
1700 













































1701 
1702 void Assembler::movb(Address dst, int imm8) {
1703   InstructionMark im(this);
1704    prefix(dst);
1705   emit_int8((unsigned char)0xC6);
1706   emit_operand(rax, dst, 1);
1707   emit_int8(imm8);
1708 }
1709 
1710 
1711 void Assembler::movb(Address dst, Register src) {
1712   assert(src->has_byte_register(), "must have byte register");
1713   InstructionMark im(this);
1714   prefix(dst, src, true);
1715   emit_int8((unsigned char)0x88);
1716   emit_operand(src, dst);
1717 }
1718 
1719 void Assembler::movdl(XMMRegister dst, Register src) {
1720   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1721   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
1722   emit_int8(0x6E);
1723   emit_int8((unsigned char)(0xC0 | encode));
1724 }
1725 
1726 void Assembler::movdl(Register dst, XMMRegister src) {
1727   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1728   // swap src/dst to get correct prefix
1729   int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66);
1730   emit_int8(0x7E);
1731   emit_int8((unsigned char)(0xC0 | encode));
1732 }
1733 
1734 void Assembler::movdl(XMMRegister dst, Address src) {
1735   NOT_LP64(assert(VM_Version::supports_sse2(), ""));




1736   InstructionMark im(this);
1737   simd_prefix(dst, src, VEX_SIMD_66);
1738   emit_int8(0x6E);
1739   emit_operand(dst, src);
1740 }
1741 
1742 void Assembler::movdl(Address dst, XMMRegister src) {
1743   NOT_LP64(assert(VM_Version::supports_sse2(), ""));




1744   InstructionMark im(this);
1745   simd_prefix(dst, src, VEX_SIMD_66);
1746   emit_int8(0x7E);
1747   emit_operand(src, dst);
1748 }
1749 
1750 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
1751   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1752   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
1753 }
1754 
1755 void Assembler::movdqa(XMMRegister dst, Address src) {
1756   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



1757   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
1758 }
1759 
1760 void Assembler::movdqu(XMMRegister dst, Address src) {
1761   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



1762   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
1763 }
1764 
1765 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
1766   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1767   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
1768 }
1769 
1770 void Assembler::movdqu(Address dst, XMMRegister src) {
1771   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



1772   InstructionMark im(this);
1773   simd_prefix(dst, src, VEX_SIMD_F3);
1774   emit_int8(0x7F);
1775   emit_operand(src, dst);
1776 }
1777 
1778 // Move Unaligned 256bit Vector
1779 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
1780   assert(UseAVX > 0, "");
1781   bool vector256 = true;
1782   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256);



1783   emit_int8(0x6F);
1784   emit_int8((unsigned char)(0xC0 | encode));
1785 }
1786 
1787 void Assembler::vmovdqu(XMMRegister dst, Address src) {
1788   assert(UseAVX > 0, "");



1789   InstructionMark im(this);
1790   bool vector256 = true;
1791   vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256);
1792   emit_int8(0x6F);
1793   emit_operand(dst, src);
1794 }
1795 
1796 void Assembler::vmovdqu(Address dst, XMMRegister src) {
1797   assert(UseAVX > 0, "");



1798   InstructionMark im(this);
1799   bool vector256 = true;
1800   // swap src<->dst for encoding
1801   assert(src != xnoreg, "sanity");
1802   vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256);








































1803   emit_int8(0x7F);
1804   emit_operand(src, dst);
1805 }
1806 
1807 // Uses zero extension on 64bit
1808 
1809 void Assembler::movl(Register dst, int32_t imm32) {
1810   int encode = prefix_and_encode(dst->encoding());
1811   emit_int8((unsigned char)(0xB8 | encode));
1812   emit_int32(imm32);
1813 }
1814 
1815 void Assembler::movl(Register dst, Register src) {
1816   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1817   emit_int8((unsigned char)0x8B);
1818   emit_int8((unsigned char)(0xC0 | encode));
1819 }
1820 
1821 void Assembler::movl(Register dst, Address src) {
1822   InstructionMark im(this);

1828 void Assembler::movl(Address dst, int32_t imm32) {
1829   InstructionMark im(this);
1830   prefix(dst);
1831   emit_int8((unsigned char)0xC7);
1832   emit_operand(rax, dst, 4);
1833   emit_int32(imm32);
1834 }
1835 
1836 void Assembler::movl(Address dst, Register src) {
1837   InstructionMark im(this);
1838   prefix(dst, src);
1839   emit_int8((unsigned char)0x89);
1840   emit_operand(src, dst);
1841 }
1842 
1843 // New cpus require to use movsd and movss to avoid partial register stall
1844 // when loading from memory. But for old Opteron use movlpd instead of movsd.
1845 // The selection is done in MacroAssembler::movdbl() and movflt().
1846 void Assembler::movlpd(XMMRegister dst, Address src) {
1847   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1848   emit_simd_arith(0x12, dst, src, VEX_SIMD_66);




1849 }
1850 
1851 void Assembler::movq( MMXRegister dst, Address src ) {
1852   assert( VM_Version::supports_mmx(), "" );
1853   emit_int8(0x0F);
1854   emit_int8(0x6F);
1855   emit_operand(dst, src);
1856 }
1857 
1858 void Assembler::movq( Address dst, MMXRegister src ) {
1859   assert( VM_Version::supports_mmx(), "" );
1860   emit_int8(0x0F);
1861   emit_int8(0x7F);
1862   // workaround gcc (3.2.1-7a) bug
1863   // In that version of gcc with only an emit_operand(MMX, Address)
1864   // gcc will tail jump and try and reverse the parameters completely
1865   // obliterating dst in the process. By having a version available
1866   // that doesn't need to swap the args at the tail jump the bug is
1867   // avoided.
1868   emit_operand(dst, src);
1869 }
1870 
1871 void Assembler::movq(XMMRegister dst, Address src) {
1872   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1873   InstructionMark im(this);
1874   simd_prefix(dst, src, VEX_SIMD_F3);






1875   emit_int8(0x7E);
1876   emit_operand(dst, src);
1877 }
1878 
1879 void Assembler::movq(Address dst, XMMRegister src) {
1880   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1881   InstructionMark im(this);
1882   simd_prefix(dst, src, VEX_SIMD_66);







1883   emit_int8((unsigned char)0xD6);
1884   emit_operand(src, dst);
1885 }
1886 
1887 void Assembler::movsbl(Register dst, Address src) { // movsxb
1888   InstructionMark im(this);
1889   prefix(src, dst);
1890   emit_int8(0x0F);
1891   emit_int8((unsigned char)0xBE);
1892   emit_operand(dst, src);
1893 }
1894 
1895 void Assembler::movsbl(Register dst, Register src) { // movsxb
1896   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1897   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1898   emit_int8(0x0F);
1899   emit_int8((unsigned char)0xBE);
1900   emit_int8((unsigned char)(0xC0 | encode));
1901 }
1902 
1903 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
1904   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



1905   emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);

1906 }
1907 
1908 void Assembler::movsd(XMMRegister dst, Address src) {
1909   NOT_LP64(assert(VM_Version::supports_sse2(), ""));





1910   emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);

1911 }
1912 
1913 void Assembler::movsd(Address dst, XMMRegister src) {
1914   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1915   InstructionMark im(this);
1916   simd_prefix(dst, src, VEX_SIMD_F2);






1917   emit_int8(0x11);
1918   emit_operand(src, dst);
1919 }
1920 
1921 void Assembler::movss(XMMRegister dst, XMMRegister src) {
1922   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1923   emit_simd_arith(0x10, dst, src, VEX_SIMD_F3);
1924 }
1925 
1926 void Assembler::movss(XMMRegister dst, Address src) {
1927   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1928   emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3);




1929 }
1930 
1931 void Assembler::movss(Address dst, XMMRegister src) {
1932   NOT_LP64(assert(VM_Version::supports_sse(), ""));




1933   InstructionMark im(this);
1934   simd_prefix(dst, src, VEX_SIMD_F3);
1935   emit_int8(0x11);
1936   emit_operand(src, dst);
1937 }
1938 
1939 void Assembler::movswl(Register dst, Address src) { // movsxw
1940   InstructionMark im(this);
1941   prefix(src, dst);
1942   emit_int8(0x0F);
1943   emit_int8((unsigned char)0xBF);
1944   emit_operand(dst, src);
1945 }
1946 
1947 void Assembler::movswl(Register dst, Register src) { // movsxw
1948   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1949   emit_int8(0x0F);
1950   emit_int8((unsigned char)0xBF);
1951   emit_int8((unsigned char)(0xC0 | encode));
1952 }
1953 
1954 void Assembler::movw(Address dst, int imm16) {

2006   emit_int8(0x0F);
2007   emit_int8((unsigned char)0xB7);
2008   emit_int8(0xC0 | encode);
2009 }
2010 
2011 void Assembler::mull(Address src) {
2012   InstructionMark im(this);
2013   prefix(src);
2014   emit_int8((unsigned char)0xF7);
2015   emit_operand(rsp, src);
2016 }
2017 
2018 void Assembler::mull(Register src) {
2019   int encode = prefix_and_encode(src->encoding());
2020   emit_int8((unsigned char)0xF7);
2021   emit_int8((unsigned char)(0xE0 | encode));
2022 }
2023 
2024 void Assembler::mulsd(XMMRegister dst, Address src) {
2025   NOT_LP64(assert(VM_Version::supports_sse2(), ""));





2026   emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);

2027 }
2028 
2029 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
2030   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



2031   emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);

2032 }
2033 
2034 void Assembler::mulss(XMMRegister dst, Address src) {
2035   NOT_LP64(assert(VM_Version::supports_sse(), ""));




2036   emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
2037 }
2038 
2039 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
2040   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2041   emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
2042 }
2043 
2044 void Assembler::negl(Register dst) {
2045   int encode = prefix_and_encode(dst->encoding());
2046   emit_int8((unsigned char)0xF7);
2047   emit_int8((unsigned char)(0xD8 | encode));
2048 }
2049 
2050 void Assembler::nop(int i) {
2051 #ifdef ASSERT
2052   assert(i > 0, " ");
2053   // The fancy nops aren't currently recognized by debuggers making it a
2054   // pain to disassemble code while debugging. If asserts are on clearly
2055   // speed is not an issue so simply use the single byte traditional nop

2315 void Assembler::orl(Register dst, int32_t imm32) {
2316   prefix(dst);
2317   emit_arith(0x81, 0xC8, dst, imm32);
2318 }
2319 
2320 void Assembler::orl(Register dst, Address src) {
2321   InstructionMark im(this);
2322   prefix(src, dst);
2323   emit_int8(0x0B);
2324   emit_operand(dst, src);
2325 }
2326 
2327 void Assembler::orl(Register dst, Register src) {
2328   (void) prefix_and_encode(dst->encoding(), src->encoding());
2329   emit_arith(0x0B, 0xC0, dst, src);
2330 }
2331 
2332 void Assembler::packuswb(XMMRegister dst, Address src) {
2333   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2334   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2335   emit_simd_arith(0x67, dst, src, VEX_SIMD_66);





2336 }
2337 
2338 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
2339   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2340   emit_simd_arith(0x67, dst, src, VEX_SIMD_66);

2341 }
2342 
2343 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
2344   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
2345   emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector256);

2346 }
2347 
2348 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256) {
2349   assert(VM_Version::supports_avx2(), "");
2350   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true, vector256);

2351   emit_int8(0x00);
2352   emit_int8(0xC0 | encode);
2353   emit_int8(imm8);
2354 }
2355 
2356 void Assembler::pause() {
2357   emit_int8((unsigned char)0xF3);
2358   emit_int8((unsigned char)0x90);
2359 }
2360 
2361 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
2362   assert(VM_Version::supports_sse4_2(), "");
2363   InstructionMark im(this);
2364   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);

2365   emit_int8(0x61);
2366   emit_operand(dst, src);
2367   emit_int8(imm8);
2368 }
2369 
2370 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
2371   assert(VM_Version::supports_sse4_2(), "");
2372   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);

2373   emit_int8(0x61);
2374   emit_int8((unsigned char)(0xC0 | encode));
2375   emit_int8(imm8);
2376 }
2377 
2378 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
2379   assert(VM_Version::supports_sse4_1(), "");
2380   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, false);

2381   emit_int8(0x16);
2382   emit_int8((unsigned char)(0xC0 | encode));
2383   emit_int8(imm8);
2384 }
2385 
2386 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
2387   assert(VM_Version::supports_sse4_1(), "");
2388   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true);

2389   emit_int8(0x16);
2390   emit_int8((unsigned char)(0xC0 | encode));
2391   emit_int8(imm8);
2392 }
2393 
2394 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
2395   assert(VM_Version::supports_sse4_1(), "");
2396   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, false);

2397   emit_int8(0x22);
2398   emit_int8((unsigned char)(0xC0 | encode));
2399   emit_int8(imm8);
2400 }
2401 
2402 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
2403   assert(VM_Version::supports_sse4_1(), "");
2404   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, true);

2405   emit_int8(0x22);
2406   emit_int8((unsigned char)(0xC0 | encode));
2407   emit_int8(imm8);
2408 }
2409 
2410 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
2411   assert(VM_Version::supports_sse4_1(), "");



2412   InstructionMark im(this);
2413   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2414   emit_int8(0x30);
2415   emit_operand(dst, src);
2416 }
2417 
2418 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
2419   assert(VM_Version::supports_sse4_1(), "");
2420   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2421   emit_int8(0x30);
2422   emit_int8((unsigned char)(0xC0 | encode));
2423 }
2424 
2425 // generic
2426 void Assembler::pop(Register dst) {
2427   int encode = prefix_and_encode(dst->encoding());
2428   emit_int8(0x58 | encode);
2429 }
2430 
2431 void Assembler::popcntl(Register dst, Address src) {
2432   assert(VM_Version::supports_popcnt(), "must support");
2433   InstructionMark im(this);
2434   emit_int8((unsigned char)0xF3);
2435   prefix(src, dst);
2436   emit_int8(0x0F);
2437   emit_int8((unsigned char)0xB8);
2438   emit_operand(dst, src);
2439 }
2440

2503   InstructionMark im(this);
2504   prefetch_prefix(src);
2505   emit_int8(0x18);
2506   emit_operand(rbx, src); // 3, src
2507 }
2508 
2509 void Assembler::prefetchw(Address src) {
2510   assert(VM_Version::supports_3dnow_prefetch(), "must support");
2511   InstructionMark im(this);
2512   prefetch_prefix(src);
2513   emit_int8(0x0D);
2514   emit_operand(rcx, src); // 1, src
2515 }
2516 
2517 void Assembler::prefix(Prefix p) {
2518   emit_int8(p);
2519 }
2520 
2521 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
2522   assert(VM_Version::supports_ssse3(), "");
2523   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);

2524   emit_int8(0x00);
2525   emit_int8((unsigned char)(0xC0 | encode));
2526 }
2527 
2528 void Assembler::pshufb(XMMRegister dst, Address src) {
2529   assert(VM_Version::supports_ssse3(), "");



2530   InstructionMark im(this);
2531   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);

2532   emit_int8(0x00);
2533   emit_operand(dst, src);
2534 }
2535 
2536 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
2537   assert(isByte(mode), "invalid value");
2538   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2539   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
2540   emit_int8(mode & 0xFF);
2541 
2542 }
2543 
2544 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
2545   assert(isByte(mode), "invalid value");
2546   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2547   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");




2548   InstructionMark im(this);
2549   simd_prefix(dst, src, VEX_SIMD_66);
2550   emit_int8(0x70);
2551   emit_operand(dst, src);
2552   emit_int8(mode & 0xFF);
2553 }
2554 
2555 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
2556   assert(isByte(mode), "invalid value");
2557   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2558   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2);

2559   emit_int8(mode & 0xFF);
2560 }
2561 
2562 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
2563   assert(isByte(mode), "invalid value");
2564   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2565   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");



2566   InstructionMark im(this);
2567   simd_prefix(dst, src, VEX_SIMD_F2);

2568   emit_int8(0x70);
2569   emit_operand(dst, src);
2570   emit_int8(mode & 0xFF);
2571 }
2572 
2573 void Assembler::psrldq(XMMRegister dst, int shift) {
2574   // Shift 128 bit value in xmm register by number of bytes.
2575   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2576   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66);

2577   emit_int8(0x73);
2578   emit_int8((unsigned char)(0xC0 | encode));
2579   emit_int8(shift);
2580 }
2581 
2582 void Assembler::ptest(XMMRegister dst, Address src) {
2583   assert(VM_Version::supports_sse4_1(), "");
2584   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2585   InstructionMark im(this);
2586   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2587   emit_int8(0x17);
2588   emit_operand(dst, src);
2589 }
2590 
2591 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
2592   assert(VM_Version::supports_sse4_1(), "");
2593   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);

2594   emit_int8(0x17);
2595   emit_int8((unsigned char)(0xC0 | encode));
2596 }
2597 
2598 void Assembler::vptest(XMMRegister dst, Address src) {
2599   assert(VM_Version::supports_avx(), "");
2600   InstructionMark im(this);
2601   bool vector256 = true;
2602   assert(dst != xnoreg, "sanity");
2603   int dst_enc = dst->encoding();
2604   // swap src<->dst for encoding
2605   vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
2606   emit_int8(0x17);
2607   emit_operand(dst, src);
2608 }
2609 
2610 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
2611   assert(VM_Version::supports_avx(), "");
2612   bool vector256 = true;
2613   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);

2614   emit_int8(0x17);
2615   emit_int8((unsigned char)(0xC0 | encode));
2616 }
2617 
2618 void Assembler::punpcklbw(XMMRegister dst, Address src) {
2619   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2620   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");



2621   emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
2622 }
2623 
2624 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
2625   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2626   emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
2627 }
2628 
2629 void Assembler::punpckldq(XMMRegister dst, Address src) {
2630   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2631   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");




2632   emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
2633 }
2634 
2635 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
2636   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2637   emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
2638 }
2639 
2640 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
2641   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2642   emit_simd_arith(0x6C, dst, src, VEX_SIMD_66);
2643 }
2644 
2645 void Assembler::push(int32_t imm32) {
2646   // in 64bits we push 64bits onto the stack but only
2647   // take a 32bit immediate
2648   emit_int8(0x68);
2649   emit_int32(imm32);
2650 }
2651

2821   assert(isShiftCount(imm8), "illegal shift count");
2822   int encode = prefix_and_encode(dst->encoding());
2823   emit_int8((unsigned char)0xC1);
2824   emit_int8((unsigned char)(0xE8 | encode));
2825   emit_int8(imm8);
2826 }
2827 
2828 void Assembler::shrl(Register dst) {
2829   int encode = prefix_and_encode(dst->encoding());
2830   emit_int8((unsigned char)0xD3);
2831   emit_int8((unsigned char)(0xE8 | encode));
2832 }
2833 
2834 // copies a single word from [esi] to [edi]
2835 void Assembler::smovl() {
2836   emit_int8((unsigned char)0xA5);
2837 }
2838 
2839 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
2840   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



2841   emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);

2842 }
2843 
2844 void Assembler::sqrtsd(XMMRegister dst, Address src) {
2845   NOT_LP64(assert(VM_Version::supports_sse2(), ""));





2846   emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);

2847 }
2848 
2849 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
2850   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2851   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
2852 }
2853 
2854 void Assembler::std() {
2855   emit_int8((unsigned char)0xFD);
2856 }
2857 
2858 void Assembler::sqrtss(XMMRegister dst, Address src) {
2859   NOT_LP64(assert(VM_Version::supports_sse(), ""));




2860   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
2861 }
2862 
2863 void Assembler::stmxcsr( Address dst) {
2864   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2865   InstructionMark im(this);
2866   prefix(dst);
2867   emit_int8(0x0F);
2868   emit_int8((unsigned char)0xAE);
2869   emit_operand(as_Register(3), dst);
2870 }
2871 
2872 void Assembler::subl(Address dst, int32_t imm32) {
2873   InstructionMark im(this);
2874   prefix(dst);
2875   emit_arith_operand(0x81, rbp, dst, imm32);
2876 }
2877 
2878 void Assembler::subl(Address dst, Register src) {
2879   InstructionMark im(this);

2890 // Force generation of a 4 byte immediate value even if it fits into 8bit
2891 void Assembler::subl_imm32(Register dst, int32_t imm32) {
2892   prefix(dst);
2893   emit_arith_imm32(0x81, 0xE8, dst, imm32);
2894 }
2895 
2896 void Assembler::subl(Register dst, Address src) {
2897   InstructionMark im(this);
2898   prefix(src, dst);
2899   emit_int8(0x2B);
2900   emit_operand(dst, src);
2901 }
2902 
2903 void Assembler::subl(Register dst, Register src) {
2904   (void) prefix_and_encode(dst->encoding(), src->encoding());
2905   emit_arith(0x2B, 0xC0, dst, src);
2906 }
2907 
2908 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
2909   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



2910   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);

2911 }
2912 
2913 void Assembler::subsd(XMMRegister dst, Address src) {
2914   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2915   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);




2916 }
2917 
2918 void Assembler::subss(XMMRegister dst, XMMRegister src) {
2919   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2920   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
2921 }
2922 
2923 void Assembler::subss(XMMRegister dst, Address src) {
2924   NOT_LP64(assert(VM_Version::supports_sse(), ""));




2925   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
2926 }
2927 
2928 void Assembler::testb(Register dst, int imm8) {
2929   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2930   (void) prefix_and_encode(dst->encoding(), true);
2931   emit_arith_b(0xF6, 0xC0, dst, imm8);
2932 }
2933 
2934 void Assembler::testl(Register dst, int32_t imm32) {
2935   // not using emit_arith because test
2936   // doesn't support sign-extension of
2937   // 8bit operands
2938   int encode = dst->encoding();
2939   if (encode == 0) {
2940     emit_int8((unsigned char)0xA9);
2941   } else {
2942     encode = prefix_and_encode(encode);
2943     emit_int8((unsigned char)0xF7);
2944     emit_int8((unsigned char)(0xC0 | encode));

2961 void Assembler::tzcntl(Register dst, Register src) {
2962   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
2963   emit_int8((unsigned char)0xF3);
2964   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2965   emit_int8(0x0F);
2966   emit_int8((unsigned char)0xBC);
2967   emit_int8((unsigned char)0xC0 | encode);
2968 }
2969 
2970 void Assembler::tzcntq(Register dst, Register src) {
2971   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
2972   emit_int8((unsigned char)0xF3);
2973   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2974   emit_int8(0x0F);
2975   emit_int8((unsigned char)0xBC);
2976   emit_int8((unsigned char)(0xC0 | encode));
2977 }
2978 
2979 void Assembler::ucomisd(XMMRegister dst, Address src) {
2980   NOT_LP64(assert(VM_Version::supports_sse2(), ""));





2981   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);

2982 }
2983 
2984 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
2985   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



2986   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);

2987 }
2988 
2989 void Assembler::ucomiss(XMMRegister dst, Address src) {
2990   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2991   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);




2992 }
2993 
2994 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
2995   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2996   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
2997 }
2998 
2999 void Assembler::xabort(int8_t imm8) {
3000   emit_int8((unsigned char)0xC6);
3001   emit_int8((unsigned char)0xF8);
3002   emit_int8((unsigned char)(imm8 & 0xFF));
3003 }
3004 
3005 void Assembler::xaddl(Address dst, Register src) {
3006   InstructionMark im(this);
3007   prefix(dst, src);
3008   emit_int8(0x0F);
3009   emit_int8((unsigned char)0xC1);
3010   emit_operand(src, dst);
3011 }
3012 
3013 void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
3014   InstructionMark im(this);
3015   relocate(rtype);
3016   if (abort.is_bound()) {

3058   emit_arith(0x81, 0xF0, dst, imm32);
3059 }
3060 
3061 void Assembler::xorl(Register dst, Address src) {
3062   InstructionMark im(this);
3063   prefix(src, dst);
3064   emit_int8(0x33);
3065   emit_operand(dst, src);
3066 }
3067 
3068 void Assembler::xorl(Register dst, Register src) {
3069   (void) prefix_and_encode(dst->encoding(), src->encoding());
3070   emit_arith(0x33, 0xC0, dst, src);
3071 }
3072 
3073 
3074 // AVX 3-operands scalar float-point arithmetic instructions
3075 
3076 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
3077   assert(VM_Version::supports_avx(), "");
3078   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);






3079 }
3080 
3081 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3082   assert(VM_Version::supports_avx(), "");
3083   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);




3084 }
3085 
3086 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
3087   assert(VM_Version::supports_avx(), "");
3088   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);




3089 }
3090 
3091 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3092   assert(VM_Version::supports_avx(), "");
3093   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3094 }
3095 
3096 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
3097   assert(VM_Version::supports_avx(), "");
3098   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);






3099 }
3100 
3101 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3102   assert(VM_Version::supports_avx(), "");
3103   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);




3104 }
3105 
3106 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
3107   assert(VM_Version::supports_avx(), "");
3108   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);




3109 }
3110 
3111 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3112   assert(VM_Version::supports_avx(), "");
3113   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3114 }
3115 
3116 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
3117   assert(VM_Version::supports_avx(), "");
3118   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);






3119 }
3120 
3121 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3122   assert(VM_Version::supports_avx(), "");
3123   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);




3124 }
3125 
3126 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
3127   assert(VM_Version::supports_avx(), "");
3128   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);




3129 }
3130 
3131 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3132   assert(VM_Version::supports_avx(), "");
3133   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3134 }
3135 
3136 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
3137   assert(VM_Version::supports_avx(), "");
3138   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);






3139 }
3140 
3141 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3142   assert(VM_Version::supports_avx(), "");
3143   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);




3144 }
3145 
3146 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
3147   assert(VM_Version::supports_avx(), "");
3148   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);




3149 }
3150 
3151 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3152   assert(VM_Version::supports_avx(), "");
3153   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3154 }
3155 
3156 //====================VECTOR ARITHMETIC=====================================
3157 
3158 // Float-point vector arithmetic
3159 
3160 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
3161   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



3162   emit_simd_arith(0x58, dst, src, VEX_SIMD_66);

3163 }
3164 
3165 void Assembler::addps(XMMRegister dst, XMMRegister src) {
3166   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3167   emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
3168 }
3169 
3170 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3171   assert(VM_Version::supports_avx(), "");
3172   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);




3173 }
3174 
3175 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3176   assert(VM_Version::supports_avx(), "");
3177   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
3178 }
3179 
3180 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3181   assert(VM_Version::supports_avx(), "");
3182   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);






3183 }
3184 
3185 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3186   assert(VM_Version::supports_avx(), "");
3187   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);




3188 }
3189 
3190 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
3191   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



3192   emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);

3193 }
3194 
3195 void Assembler::subps(XMMRegister dst, XMMRegister src) {
3196   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3197   emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
3198 }
3199 
3200 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3201   assert(VM_Version::supports_avx(), "");
3202   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);




3203 }
3204 
3205 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3206   assert(VM_Version::supports_avx(), "");
3207   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
3208 }
3209 
3210 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3211   assert(VM_Version::supports_avx(), "");
3212   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);






3213 }
3214 
3215 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3216   assert(VM_Version::supports_avx(), "");
3217   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);




3218 }
3219 
3220 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
3221   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



3222   emit_simd_arith(0x59, dst, src, VEX_SIMD_66);

3223 }
3224 
3225 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
3226   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3227   emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
3228 }
3229 
3230 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3231   assert(VM_Version::supports_avx(), "");
3232   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);




3233 }
3234 
3235 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3236   assert(VM_Version::supports_avx(), "");
3237   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
3238 }
3239 
3240 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3241   assert(VM_Version::supports_avx(), "");
3242   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);






3243 }
3244 
3245 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3246   assert(VM_Version::supports_avx(), "");
3247   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);




3248 }
3249 
3250 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
3251   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



3252   emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);

3253 }
3254 
3255 void Assembler::divps(XMMRegister dst, XMMRegister src) {
3256   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3257   emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
3258 }
3259 
3260 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3261   assert(VM_Version::supports_avx(), "");
3262   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);




3263 }
3264 
3265 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3266   assert(VM_Version::supports_avx(), "");
3267   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
3268 }
3269 
3270 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3271   assert(VM_Version::supports_avx(), "");
3272   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);






3273 }
3274 
3275 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3276   assert(VM_Version::supports_avx(), "");
3277   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);




3278 }
3279 
3280 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
3281   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3282   emit_simd_arith(0x54, dst, src, VEX_SIMD_66);




3283 }
3284 
3285 void Assembler::andps(XMMRegister dst, XMMRegister src) {
3286   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3287   emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);

3288 }
3289 
3290 void Assembler::andps(XMMRegister dst, Address src) {
3291   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3292   emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);





3293 }
3294 
3295 void Assembler::andpd(XMMRegister dst, Address src) {
3296   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3297   emit_simd_arith(0x54, dst, src, VEX_SIMD_66);






3298 }
3299 
3300 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3301   assert(VM_Version::supports_avx(), "");
3302   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);




3303 }
3304 
3305 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3306   assert(VM_Version::supports_avx(), "");
3307   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);

3308 }
3309 
3310 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3311   assert(VM_Version::supports_avx(), "");
3312   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);






3313 }
3314 
3315 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3316   assert(VM_Version::supports_avx(), "");
3317   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);





3318 }
3319 
3320 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
3321   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3322   emit_simd_arith(0x57, dst, src, VEX_SIMD_66);




3323 }
3324 
3325 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
3326   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3327   emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);

3328 }
3329 
3330 void Assembler::xorpd(XMMRegister dst, Address src) {
3331   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3332   emit_simd_arith(0x57, dst, src, VEX_SIMD_66);






3333 }
3334 
3335 void Assembler::xorps(XMMRegister dst, Address src) {
3336   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3337   emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);





3338 }
3339 
3340 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3341   assert(VM_Version::supports_avx(), "");
3342   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);




3343 }
3344 
3345 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3346   assert(VM_Version::supports_avx(), "");
3347   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);

3348 }
3349 
3350 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3351   assert(VM_Version::supports_avx(), "");
3352   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);






3353 }
3354 
3355 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3356   assert(VM_Version::supports_avx(), "");
3357   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);





3358 }
3359 
3360 
3361 // Integer vector arithmetic
3362 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3363   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3364   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);


3365   emit_int8(0x01);
3366   emit_int8((unsigned char)(0xC0 | encode));
3367 }
3368 
3369 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3370   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3371   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);


3372   emit_int8(0x02);
3373   emit_int8((unsigned char)(0xC0 | encode));
3374 }
3375 
3376 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
3377   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3378   emit_simd_arith(0xFC, dst, src, VEX_SIMD_66);
3379 }
3380 
3381 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
3382   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3383   emit_simd_arith(0xFD, dst, src, VEX_SIMD_66);
3384 }
3385 
3386 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
3387   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3388   emit_simd_arith(0xFE, dst, src, VEX_SIMD_66);
3389 }
3390 
3391 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
3392   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



3393   emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);

3394 }
3395 
3396 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
3397   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
3398   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);

3399   emit_int8(0x01);
3400   emit_int8((unsigned char)(0xC0 | encode));
3401 }
3402 
3403 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
3404   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
3405   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);

3406   emit_int8(0x02);
3407   emit_int8((unsigned char)(0xC0 | encode));
3408 }
3409 
3410 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3411   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3412   emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);

3413 }
3414 
3415 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3416   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3417   emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);

3418 }
3419 
3420 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3421   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3422   emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
3423 }
3424 
3425 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3426   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3427   emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);




3428 }
3429 
3430 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3431   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3432   emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);



3433 }
3434 
3435 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3436   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3437   emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);



3438 }
3439 
3440 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3441   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3442   emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);




3443 }
3444 
3445 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3446   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3447   emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);






3448 }
3449 
3450 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
3451   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3452   emit_simd_arith(0xF8, dst, src, VEX_SIMD_66);
3453 }
3454 
3455 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
3456   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3457   emit_simd_arith(0xF9, dst, src, VEX_SIMD_66);
3458 }
3459 
3460 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
3461   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3462   emit_simd_arith(0xFA, dst, src, VEX_SIMD_66);
3463 }
3464 
3465 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
3466   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



3467   emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);

3468 }
3469 
3470 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3471   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3472   emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);

3473 }
3474 
3475 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3476   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3477   emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);

3478 }
3479 
3480 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3481   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3482   emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
3483 }
3484 
3485 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3486   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3487   emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);




3488 }
3489 
3490 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3491   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3492   emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);




3493 }
3494 
3495 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3496   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3497   emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);




3498 }
3499 
3500 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3501   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3502   emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);




3503 }
3504 
3505 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3506   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3507   emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);






3508 }
3509 
3510 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
3511   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3512   emit_simd_arith(0xD5, dst, src, VEX_SIMD_66);

3513 }
3514 
3515 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
3516   assert(VM_Version::supports_sse4_1(), "");
3517   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);

3518   emit_int8(0x40);
3519   emit_int8((unsigned char)(0xC0 | encode));
3520 }
3521 
3522 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3523   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3524   emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);









3525 }
3526 
3527 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3528   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3529   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);




3530   emit_int8(0x40);
3531   emit_int8((unsigned char)(0xC0 | encode));
3532 }
3533 
3534 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3535   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3536   emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);



3537 }
3538 
3539 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3540   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");




3541   InstructionMark im(this);
3542   int dst_enc = dst->encoding();
3543   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
3544   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);















3545   emit_int8(0x40);
3546   emit_operand(dst, src);
3547 }
3548 
3549 // Shift packed integers left by specified number of bits.
3550 void Assembler::psllw(XMMRegister dst, int shift) {
3551   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3552   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
3553   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);

3554   emit_int8(0x71);
3555   emit_int8((unsigned char)(0xC0 | encode));
3556   emit_int8(shift & 0xFF);
3557 }
3558 
3559 void Assembler::pslld(XMMRegister dst, int shift) {
3560   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3561   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
3562   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
3563   emit_int8(0x72);
3564   emit_int8((unsigned char)(0xC0 | encode));
3565   emit_int8(shift & 0xFF);
3566 }
3567 
3568 void Assembler::psllq(XMMRegister dst, int shift) {
3569   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3570   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
3571   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
3572   emit_int8(0x73);
3573   emit_int8((unsigned char)(0xC0 | encode));
3574   emit_int8(shift & 0xFF);
3575 }
3576 
3577 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
3578   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3579   emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66);

3580 }
3581 
3582 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
3583   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3584   emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66);
3585 }
3586 
3587 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
3588   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



3589   emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);

3590 }
3591 
3592 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3593   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3594   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
3595   emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256);

3596   emit_int8(shift & 0xFF);
3597 }
3598 
3599 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3600   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3601   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
3602   emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256);
3603   emit_int8(shift & 0xFF);
3604 }
3605 
3606 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3607   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3608   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
3609   emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256);




3610   emit_int8(shift & 0xFF);
3611 }
3612 
3613 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3614   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3615   emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector256);

3616 }
3617 
3618 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3619   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3620   emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector256);
3621 }
3622 
3623 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3624   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3625   emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector256);




3626 }
3627 
3628 // Shift packed integers logically right by specified number of bits.
3629 void Assembler::psrlw(XMMRegister dst, int shift) {
3630   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3631   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
3632   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);

3633   emit_int8(0x71);
3634   emit_int8((unsigned char)(0xC0 | encode));
3635   emit_int8(shift & 0xFF);
3636 }
3637 
3638 void Assembler::psrld(XMMRegister dst, int shift) {
3639   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3640   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
3641   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
3642   emit_int8(0x72);
3643   emit_int8((unsigned char)(0xC0 | encode));
3644   emit_int8(shift & 0xFF);
3645 }
3646 
3647 void Assembler::psrlq(XMMRegister dst, int shift) {
3648   // Do not confuse it with psrldq SSE2 instruction which
3649   // shifts 128 bit value in xmm register by number of bytes.
3650   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3651   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3652   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);





3653   emit_int8(0x73);
3654   emit_int8((unsigned char)(0xC0 | encode));
3655   emit_int8(shift & 0xFF);
3656 }
3657 
3658 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
3659   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3660   emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66);

3661 }
3662 
3663 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
3664   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3665   emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66);
3666 }
3667 
3668 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
3669   NOT_LP64(assert(VM_Version::supports_sse2(), ""));



3670   emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);

3671 }
3672 
3673 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3674   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3675   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3676   emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256);

3677   emit_int8(shift & 0xFF);
3678 }
3679 
3680 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3681   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3682   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3683   emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256);
3684   emit_int8(shift & 0xFF);
3685 }
3686 
3687 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3688   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3689   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3690   emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256);




3691   emit_int8(shift & 0xFF);
3692 }
3693 
3694 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3695   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3696   emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector256);

3697 }
3698 
3699 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3700   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3701   emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector256);
3702 }
3703 
3704 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3705   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3706   emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector256);




3707 }
3708 
3709 // Shift packed integers arithmetically right by specified number of bits.
3710 void Assembler::psraw(XMMRegister dst, int shift) {
3711   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3712   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3713   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);

3714   emit_int8(0x71);
3715   emit_int8((unsigned char)(0xC0 | encode));
3716   emit_int8(shift & 0xFF);
3717 }
3718 
3719 void Assembler::psrad(XMMRegister dst, int shift) {
3720   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3721   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
3722   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
3723   emit_int8(0x72);
3724   emit_int8((unsigned char)(0xC0 | encode));
3725   emit_int8(shift & 0xFF);
3726 }
3727 
3728 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
3729   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3730   emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66);

3731 }
3732 
3733 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
3734   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3735   emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
3736 }
3737 
3738 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3739   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3740   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3741   emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256);

3742   emit_int8(shift & 0xFF);
3743 }
3744 
3745 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3746   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3747   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3748   emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256);
3749   emit_int8(shift & 0xFF);
3750 }
3751 
3752 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3753   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3754   emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector256);

3755 }
3756 
3757 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3758   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3759   emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector256);
3760 }
3761 
3762 
3763 // AND packed integers
3764 void Assembler::pand(XMMRegister dst, XMMRegister src) {
3765   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3766   emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
3767 }
3768 
3769 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3770   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3771   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
3772 }
3773 
3774 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3775   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3776   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);




3777 }
3778 
3779 void Assembler::por(XMMRegister dst, XMMRegister src) {
3780   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3781   emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
3782 }
3783 
3784 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3785   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3786   emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
3787 }
3788 
3789 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3790   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3791   emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);




3792 }
3793 
3794 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
3795   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3796   emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
3797 }
3798 
3799 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3800   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3801   emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
3802 }
3803 
3804 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3805   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3806   emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);




3807 }
3808 
3809 
3810 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3811   assert(VM_Version::supports_avx(), "");
3812   bool vector256 = true;
3813   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
3814   emit_int8(0x18);
3815   emit_int8((unsigned char)(0xC0 | encode));
3816   // 0x00 - insert into lower 128 bits
3817   // 0x01 - insert into upper 128 bits
3818   emit_int8(0x01);
3819 }
3820 

































3821 void Assembler::vinsertf128h(XMMRegister dst, Address src) {
3822   assert(VM_Version::supports_avx(), "");




3823   InstructionMark im(this);
3824   bool vector256 = true;
3825   assert(dst != xnoreg, "sanity");
3826   int dst_enc = dst->encoding();
3827   // swap src<->dst for encoding
3828   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3829   emit_int8(0x18);
3830   emit_operand(dst, src);
3831   // 0x01 - insert into upper 128 bits
3832   emit_int8(0x01);
3833 }
3834 
3835 void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) {
3836   assert(VM_Version::supports_avx(), "");
3837   bool vector256 = true;
3838   int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
3839   emit_int8(0x19);
3840   emit_int8((unsigned char)(0xC0 | encode));
3841   // 0x00 - insert into lower 128 bits
3842   // 0x01 - insert into upper 128 bits
3843   emit_int8(0x01);
3844 }
3845 
3846 void Assembler::vextractf128h(Address dst, XMMRegister src) {
3847   assert(VM_Version::supports_avx(), "");




3848   InstructionMark im(this);
3849   bool vector256 = true;
3850   assert(src != xnoreg, "sanity");
3851   int src_enc = src->encoding();
3852   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3853   emit_int8(0x19);
3854   emit_operand(src, dst);
3855   // 0x01 - extract from upper 128 bits
3856   emit_int8(0x01);
3857 }
3858 
3859 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3860   assert(VM_Version::supports_avx2(), "");
3861   bool vector256 = true;
3862   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
3863   emit_int8(0x38);
3864   emit_int8((unsigned char)(0xC0 | encode));
3865   // 0x00 - insert into lower 128 bits
3866   // 0x01 - insert into upper 128 bits
3867   emit_int8(0x01);
3868 }
3869 















3870 void Assembler::vinserti128h(XMMRegister dst, Address src) {
3871   assert(VM_Version::supports_avx2(), "");




3872   InstructionMark im(this);
3873   bool vector256 = true;
3874   assert(dst != xnoreg, "sanity");
3875   int dst_enc = dst->encoding();
3876   // swap src<->dst for encoding
3877   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3878   emit_int8(0x38);
3879   emit_operand(dst, src);
3880   // 0x01 - insert into upper 128 bits
3881   emit_int8(0x01);
3882 }
3883 











3884 void Assembler::vextracti128h(Address dst, XMMRegister src) {
3885   assert(VM_Version::supports_avx2(), "");




3886   InstructionMark im(this);
3887   bool vector256 = true;
3888   assert(src != xnoreg, "sanity");
3889   int src_enc = src->encoding();
3890   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3891   emit_int8(0x39);
3892   emit_operand(src, dst);
3893   // 0x01 - extract from upper 128 bits
3894   emit_int8(0x01);
3895 }
3896 























































































3897 // duplicate 4-bytes integer data from src into 8 locations in dest
3898 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
3899   assert(VM_Version::supports_avx2(), "");
3900   bool vector256 = true;
3901   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);










3902   emit_int8(0x58);
3903   emit_int8((unsigned char)(0xC0 | encode));
3904 }
3905 
3906 // Carry-Less Multiplication Quadword
3907 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
3908   assert(VM_Version::supports_clmul(), "");
3909   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);

3910   emit_int8(0x44);
3911   emit_int8((unsigned char)(0xC0 | encode));
3912   emit_int8((unsigned char)mask);
3913 }
3914 
3915 // Carry-Less Multiplication Quadword
3916 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
3917   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
3918   bool vector256 = false;
3919   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);

3920   emit_int8(0x44);
3921   emit_int8((unsigned char)(0xC0 | encode));
3922   emit_int8((unsigned char)mask);
3923 }
3924 
3925 void Assembler::vzeroupper() {
3926   assert(VM_Version::supports_avx(), "");


3927   (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
3928   emit_int8(0x77);

3929 }
3930 
3931 
3932 #ifndef _LP64
3933 // 32bit only pieces of the assembler
3934 
3935 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
3936   // NO PREFIX AS NEVER 64BIT
3937   InstructionMark im(this);
3938   emit_int8((unsigned char)0x81);
3939   emit_int8((unsigned char)(0xF8 | src1->encoding()));
3940   emit_data(imm32, rspec, 0);
3941 }
3942 
3943 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
3944   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
3945   InstructionMark im(this);
3946   emit_int8((unsigned char)0x81);
3947   emit_operand(rdi, src1);
3948   emit_data(imm32, rspec, 0);

4425   }
4426 }
4427 
4428 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
4429   if (pre > 0) {
4430     emit_int8(simd_pre[pre]);
4431   }
4432   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
4433                           prefix_and_encode(dst_enc, src_enc);
4434   if (opc > 0) {
4435     emit_int8(0x0F);
4436     int opc2 = simd_opc[opc];
4437     if (opc2 > 0) {
4438       emit_int8(opc2);
4439     }
4440   }
4441   return encode;
4442 }
4443 
4444 
4445 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) {
4446   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
4447     prefix(VEX_3bytes);
4448 
4449     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
4450     byte1 = (~byte1) & 0xE0;
4451     byte1 |= opc;
4452     emit_int8(byte1);
4453 
4454     int byte2 = ((~nds_enc) & 0xf) << 3;
4455     byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre;
4456     emit_int8(byte2);
4457   } else {
4458     prefix(VEX_2bytes);
4459 
4460     int byte1 = vex_r ? VEX_R : 0;
4461     byte1 = (~byte1) & 0x80;
4462     byte1 |= ((~nds_enc) & 0xf) << 3;
4463     byte1 |= (vector256 ? 4 : 0) | pre;
4464     emit_int8(byte1);
4465   }
4466 }
4467 
4468 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){










































4469   bool vex_r = (xreg_enc >= 8);
4470   bool vex_b = adr.base_needs_rex();
4471   bool vex_x = adr.index_needs_rex();
4472   vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);




















4473 }
4474 
4475 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) {

4476   bool vex_r = (dst_enc >= 8);
4477   bool vex_b = (src_enc >= 8);
4478   bool vex_x = false;
4479   vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);























4480   return (((dst_enc & 7) << 3) | (src_enc & 7));
4481 }
4482 
4483 
4484 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {

4485   if (UseAVX > 0) {
4486     int xreg_enc = xreg->encoding();
4487     int  nds_enc = nds->is_valid() ? nds->encoding() : 0;
4488     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256);
4489   } else {
4490     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
4491     rex_prefix(adr, xreg, pre, opc, rex_w);
4492   }
4493 }
4494 
4495 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {

4496   int dst_enc = dst->encoding();
4497   int src_enc = src->encoding();
4498   if (UseAVX > 0) {
4499     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4500     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256);
4501   } else {
4502     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
4503     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
4504   }
4505 }
4506 
4507 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {























4508   InstructionMark im(this);
4509   simd_prefix(dst, dst, src, pre);
4510   emit_int8(opcode);
4511   emit_operand(dst, src);
4512 }
4513 
4514 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
4515   int encode = simd_prefix_and_encode(dst, dst, src, pre);






4516   emit_int8(opcode);
4517   emit_int8((unsigned char)(0xC0 | encode));
4518 }
4519 
4520 // Versions with no second source register (non-destructive source).
4521 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {







4522   InstructionMark im(this);
4523   simd_prefix(dst, xnoreg, src, pre);
4524   emit_int8(opcode);
4525   emit_operand(dst, src);
4526 }
4527 
4528 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
4529   int encode = simd_prefix_and_encode(dst, xnoreg, src, pre);






4530   emit_int8(opcode);
4531   emit_int8((unsigned char)(0xC0 | encode));
4532 }
4533 
4534 // 3-operands AVX instructions
4535 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
4536                                Address src, VexSimdPrefix pre, bool vector256) {
4537   InstructionMark im(this);
4538   vex_prefix(dst, nds, src, pre, vector256);
4539   emit_int8(opcode);
4540   emit_operand(dst, src);
4541 }
4542 
4543 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
4544                                XMMRegister src, VexSimdPrefix pre, bool vector256) {
4545   int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256);


















4546   emit_int8(opcode);
4547   emit_int8((unsigned char)(0xC0 | encode));
4548 }
4549 
4550 #ifndef _LP64
4551 
4552 void Assembler::incl(Register dst) {
4553   // Don't use it directly. Use MacroAssembler::incrementl() instead.
4554   emit_int8(0x40 | dst->encoding());
4555 }
4556 
4557 void Assembler::lea(Register dst, Address src) {
4558   leal(dst, src);
4559 }
4560 
4561 void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
4562   InstructionMark im(this);
4563   emit_int8((unsigned char)0xC7);
4564   emit_operand(rax, dst);
4565   emit_data((int)imm32, rspec, 0);

5023 void Assembler::andq(Register dst, Address src) {
5024   InstructionMark im(this);
5025   prefixq(src, dst);
5026   emit_int8(0x23);
5027   emit_operand(dst, src);
5028 }
5029 
5030 void Assembler::andq(Register dst, Register src) {
5031   (void) prefixq_and_encode(dst->encoding(), src->encoding());
5032   emit_arith(0x23, 0xC0, dst, src);
5033 }
5034 
5035 void Assembler::andnq(Register dst, Register src1, Register src2) {
5036   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5037   int encode = vex_prefix_0F38_and_encode_q(dst, src1, src2);
5038   emit_int8((unsigned char)0xF2);
5039   emit_int8((unsigned char)(0xC0 | encode));
5040 }
5041 
5042 void Assembler::andnq(Register dst, Register src1, Address src2) {




5043   InstructionMark im(this);
5044   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5045   vex_prefix_0F38_q(dst, src1, src2);
5046   emit_int8((unsigned char)0xF2);
5047   emit_operand(dst, src2);
5048 }
5049 
5050 void Assembler::bsfq(Register dst, Register src) {
5051   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5052   emit_int8(0x0F);
5053   emit_int8((unsigned char)0xBC);
5054   emit_int8((unsigned char)(0xC0 | encode));
5055 }
5056 
5057 void Assembler::bsrq(Register dst, Register src) {
5058   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5059   emit_int8(0x0F);
5060   emit_int8((unsigned char)0xBD);
5061   emit_int8((unsigned char)(0xC0 | encode));
5062 }

5164   emit_arith(0x3B, 0xC0, dst, src);
5165 }
5166 
5167 void Assembler::cmpq(Register dst, Address  src) {
5168   InstructionMark im(this);
5169   prefixq(src, dst);
5170   emit_int8(0x3B);
5171   emit_operand(dst, src);
5172 }
5173 
5174 void Assembler::cmpxchgq(Register reg, Address adr) {
5175   InstructionMark im(this);
5176   prefixq(adr, reg);
5177   emit_int8(0x0F);
5178   emit_int8((unsigned char)0xB1);
5179   emit_operand(reg, adr);
5180 }
5181 
5182 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
5183   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5184   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2);
5185   emit_int8(0x2A);
5186   emit_int8((unsigned char)(0xC0 | encode));
5187 }
5188 
5189 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
5190   NOT_LP64(assert(VM_Version::supports_sse2(), ""));




5191   InstructionMark im(this);
5192   simd_prefix_q(dst, dst, src, VEX_SIMD_F2);
5193   emit_int8(0x2A);
5194   emit_operand(dst, src);
5195 }
5196 
5197 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
5198   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5199   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3);
5200   emit_int8(0x2A);
5201   emit_int8((unsigned char)(0xC0 | encode));
5202 }
5203 
5204 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
5205   NOT_LP64(assert(VM_Version::supports_sse(), ""));




5206   InstructionMark im(this);
5207   simd_prefix_q(dst, dst, src, VEX_SIMD_F3);
5208   emit_int8(0x2A);
5209   emit_operand(dst, src);
5210 }
5211 
5212 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
5213   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5214   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2);
5215   emit_int8(0x2C);
5216   emit_int8((unsigned char)(0xC0 | encode));
5217 }
5218 
5219 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
5220   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5221   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3);
5222   emit_int8(0x2C);
5223   emit_int8((unsigned char)(0xC0 | encode));
5224 }
5225 
5226 void Assembler::decl(Register dst) {
5227   // Don't use it directly. Use MacroAssembler::decrementl() instead.
5228   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
5229   int encode = prefix_and_encode(dst->encoding());
5230   emit_int8((unsigned char)0xFF);
5231   emit_int8((unsigned char)(0xC8 | encode));
5232 }
5233 
5234 void Assembler::decq(Register dst) {
5235   // Don't use it directly. Use MacroAssembler::decrementq() instead.
5236   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
5237   int encode = prefixq_and_encode(dst->encoding());
5238   emit_int8((unsigned char)0xFF);
5239   emit_int8(0xC8 | encode);
5240 }
5241

5370 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
5371   InstructionMark im(this);
5372   prefix(src1);
5373   emit_int8((unsigned char)0x81);
5374   emit_operand(rax, src1, 4);
5375   emit_data((int)imm32, rspec, narrow_oop_operand);
5376 }
5377 
5378 void Assembler::lzcntq(Register dst, Register src) {
5379   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
5380   emit_int8((unsigned char)0xF3);
5381   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5382   emit_int8(0x0F);
5383   emit_int8((unsigned char)0xBD);
5384   emit_int8((unsigned char)(0xC0 | encode));
5385 }
5386 
5387 void Assembler::movdq(XMMRegister dst, Register src) {
5388   // table D-1 says MMX/SSE2
5389   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5390   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66);
5391   emit_int8(0x6E);
5392   emit_int8((unsigned char)(0xC0 | encode));
5393 }
5394 
5395 void Assembler::movdq(Register dst, XMMRegister src) {
5396   // table D-1 says MMX/SSE2
5397   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5398   // swap src/dst to get correct prefix
5399   int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66);
5400   emit_int8(0x7E);
5401   emit_int8((unsigned char)(0xC0 | encode));
5402 }
5403 
5404 void Assembler::movq(Register dst, Register src) {
5405   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5406   emit_int8((unsigned char)0x8B);
5407   emit_int8((unsigned char)(0xC0 | encode));
5408 }
5409 
5410 void Assembler::movq(Register dst, Address src) {
5411   InstructionMark im(this);
5412   prefixq(src, dst);
5413   emit_int8((unsigned char)0x8B);
5414   emit_operand(dst, src);
5415 }
5416 
5417 void Assembler::movq(Address dst, Register src) {
5418   InstructionMark im(this);
5419   prefixq(dst, src);

5512   emit_int8((unsigned char)0x0F);
5513   emit_int8((unsigned char)0xB7);
5514   emit_int8((unsigned char)(0xC0 | encode));
5515 }
5516 
5517 void Assembler::mulq(Address src) {
5518   InstructionMark im(this);
5519   prefixq(src);
5520   emit_int8((unsigned char)0xF7);
5521   emit_operand(rsp, src);
5522 }
5523 
5524 void Assembler::mulq(Register src) {
5525   int encode = prefixq_and_encode(src->encoding());
5526   emit_int8((unsigned char)0xF7);
5527   emit_int8((unsigned char)(0xE0 | encode));
5528 }
5529 
5530 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
5531   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
5532   int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, true, false);

5533   emit_int8((unsigned char)0xF6);
5534   emit_int8((unsigned char)(0xC0 | encode));
5535 }
5536 
5537 void Assembler::negq(Register dst) {
5538   int encode = prefixq_and_encode(dst->encoding());
5539   emit_int8((unsigned char)0xF7);
5540   emit_int8((unsigned char)(0xD8 | encode));
5541 }
5542 
5543 void Assembler::notq(Register dst) {
5544   int encode = prefixq_and_encode(dst->encoding());
5545   emit_int8((unsigned char)0xF7);
5546   emit_int8((unsigned char)(0xD0 | encode));
5547 }
5548 
5549 void Assembler::orq(Address dst, int32_t imm32) {
5550   InstructionMark im(this);
5551   prefixq(dst);
5552   emit_int8((unsigned char)0x81);

5661     emit_int8((unsigned char)(0xD0 | encode));
5662     emit_int8(imm8);
5663   }
5664 }
5665 
5666 void Assembler::rorq(Register dst, int imm8) {
5667   assert(isShiftCount(imm8 >> 1), "illegal shift count");
5668   int encode = prefixq_and_encode(dst->encoding());
5669   if (imm8 == 1) {
5670     emit_int8((unsigned char)0xD1);
5671     emit_int8((unsigned char)(0xC8 | encode));
5672   } else {
5673     emit_int8((unsigned char)0xC1);
5674     emit_int8((unsigned char)(0xc8 | encode));
5675     emit_int8(imm8);
5676   }
5677 }
5678 
5679 void Assembler::rorxq(Register dst, Register src, int imm8) {
5680   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
5681   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, true, false);

5682   emit_int8((unsigned char)0xF0);
5683   emit_int8((unsigned char)(0xC0 | encode));
5684   emit_int8(imm8);
5685 }
5686 
5687 void Assembler::sarq(Register dst, int imm8) {
5688   assert(isShiftCount(imm8 >> 1), "illegal shift count");
5689   int encode = prefixq_and_encode(dst->encoding());
5690   if (imm8 == 1) {
5691     emit_int8((unsigned char)0xD1);
5692     emit_int8((unsigned char)(0xF8 | encode));
5693   } else {
5694     emit_int8((unsigned char)0xC1);
5695     emit_int8((unsigned char)(0xF8 | encode));
5696     emit_int8(imm8);
5697   }
5698 }
5699 
5700 void Assembler::sarq(Register dst) {
5701   int encode = prefixq_and_encode(dst->encoding());

  37 #include "runtime/sharedRuntime.hpp"
  38 #include "runtime/stubRoutines.hpp"
  39 #include "utilities/macros.hpp"
  40 #if INCLUDE_ALL_GCS
  41 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  42 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
  43 #include "gc_implementation/g1/heapRegion.hpp"
  44 #endif // INCLUDE_ALL_GCS
  45 
  46 #ifdef PRODUCT
  47 #define BLOCK_COMMENT(str) /* nothing */
  48 #define STOP(error) stop(error)
  49 #else
  50 #define BLOCK_COMMENT(str) block_comment(str)
  51 #define STOP(error) block_comment(error); stop(error)
  52 #endif
  53 
  54 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  55 // Implementation of AddressLiteral
  56 
  57 // A 2-D table for managing compressed displacement(disp8) on EVEX enabled platforms.
  58 unsigned char tuple_table[Assembler::EVEX_ETUP + 1][Assembler::AVX_512bit + 1] = {
  59   // -----------------Table 4.5 -------------------- //
  60   16, 32, 64,  // EVEX_FV(0)
  61   4,  4,  4,   // EVEX_FV(1) - with Evex.b
  62   16, 32, 64,  // EVEX_FV(2) - with Evex.w
  63   8,  8,  8,   // EVEX_FV(3) - with Evex.w and Evex.b
  64   8,  16, 32,  // EVEX_HV(0)
  65   4,  4,  4,   // EVEX_HV(1) - with Evex.b
  66   // -----------------Table 4.6 -------------------- //
  67   16, 32, 64,  // EVEX_FVM(0)
  68   1,  1,  1,   // EVEX_T1S(0)
  69   2,  2,  2,   // EVEX_T1S(1)
  70   4,  4,  4,   // EVEX_T1S(2)
  71   8,  8,  8,   // EVEX_T1S(3)
  72   4,  4,  4,   // EVEX_T1F(0)
  73   8,  8,  8,   // EVEX_T1F(1)
  74   8,  8,  8,   // EVEX_T2(0)
  75   0,  16, 16,  // EVEX_T2(1)
  76   0,  16, 16,  // EVEX_T4(0)
  77   0,  0,  32,  // EVEX_T4(1)
  78   0,  0,  32,  // EVEX_T8(0)
  79   8,  16, 32,  // EVEX_HVM(0)
  80   4,  8,  16,  // EVEX_QVM(0)
  81   2,  4,  8,   // EVEX_OVM(0)
  82   16, 16, 16,  // EVEX_M128(0)
  83   8,  32, 64,  // EVEX_DUP(0)
  84   0,  0,  0    // EVEX_NTUP
  85 };
  86 
  87 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  88   _is_lval = false;
  89   _target = target;
  90   switch (rtype) {
  91   case relocInfo::oop_type:
  92   case relocInfo::metadata_type:
  93     // Oops are a special case. Normally they would be their own section
  94     // but in cases like icBuffer they are literals in the code stream that
  95     // we don't have a section for. We use none so that we get a literal address
  96     // which is always patchable.
  97     break;
  98   case relocInfo::external_word_type:
  99     _rspec = external_word_Relocation::spec(target);
 100     break;
 101   case relocInfo::internal_word_type:
 102     _rspec = internal_word_Relocation::spec(target);
 103     break;
 104   case relocInfo::opt_virtual_call_type:
 105     _rspec = opt_virtual_call_Relocation::spec();
 106     break;

 197     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 198     madr._rspec = rspec;
 199     return madr;
 200   } else {
 201     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 202     madr._rspec = rspec;
 203     return madr;
 204   }
 205 }
 206 
 207 // Implementation of Assembler
 208 
 209 int AbstractAssembler::code_fill_byte() {
 210   return (u_char)'\xF4'; // hlt
 211 }
 212 
 213 // make this go away someday
 214 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
 215   if (rtype == relocInfo::none)
 216     emit_int32(data);
 217   else
 218     emit_data(data, Relocation::spec_simple(rtype), format);
 219 }
 220 
 221 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
 222   assert(imm_operand == 0, "default format must be immediate in this file");
 223   assert(inst_mark() != NULL, "must be inside InstructionMark");
 224   if (rspec.type() !=  relocInfo::none) {
 225     #ifdef ASSERT
 226       check_relocation(rspec, format);
 227     #endif
 228     // Do not use AbstractAssembler::relocate, which is not intended for
 229     // embedded words.  Instead, relocate to the enclosing instruction.
 230 
 231     // hack. call32 is too wide for mask so use disp32
 232     if (format == call32_operand)
 233       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 234     else
 235       code_section()->relocate(inst_mark(), rspec, format);
 236   }
 237   emit_int32(data);
 238 }

 287   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 288   if (is8bit(imm32)) {
 289     emit_int8(op1 | 0x02); // set sign bit
 290     emit_operand(rm, adr, 1);
 291     emit_int8(imm32 & 0xFF);
 292   } else {
 293     emit_int8(op1);
 294     emit_operand(rm, adr, 4);
 295     emit_int32(imm32);
 296   }
 297 }
 298 
 299 
 300 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 301   assert(isByte(op1) && isByte(op2), "wrong opcode");
 302   emit_int8(op1);
 303   emit_int8(op2 | encode(dst) << 3 | encode(src));
 304 }
 305 
 306 
 307 bool Assembler::query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
 308                                            int cur_tuple_type, int in_size_in_bits, int cur_encoding) {
 309   int mod_idx = 0;
 310   // We will test if the displacement fits the compressed format and if so
 311   // apply the compression to the displacment iff the result is8bit.
 312   if (VM_Version::supports_evex() && is_evex_inst) {
 313     switch (cur_tuple_type) {
 314     case EVEX_FV:
 315       if ((cur_encoding & VEX_W) == VEX_W) {
 316         mod_idx += 2 + ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 317       } else {
 318         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 319       }
 320       break;
 321 
 322     case EVEX_HV:
 323       mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 324       break;
 325 
 326     case EVEX_FVM:
 327       break;
 328 
 329     case EVEX_T1S:
 330       switch (in_size_in_bits) {
 331       case EVEX_8bit:
 332         break;
 333 
 334       case EVEX_16bit:
 335         mod_idx = 1;
 336         break;
 337 
 338       case EVEX_32bit:
 339         mod_idx = 2;
 340         break;
 341 
 342       case EVEX_64bit:
 343         mod_idx = 3;
 344         break;
 345       }
 346       break;
 347 
 348     case EVEX_T1F:
 349     case EVEX_T2:
 350     case EVEX_T4:
 351       mod_idx = (in_size_in_bits == EVEX_64bit) ? 1 : 0;
 352       break;
 353 
 354     case EVEX_T8:
 355       break;
 356 
 357     case EVEX_HVM:
 358       break;
 359 
 360     case EVEX_QVM:
 361       break;
 362 
 363     case EVEX_OVM:
 364       break;
 365 
 366     case EVEX_M128:
 367       break;
 368 
 369     case EVEX_DUP:
 370       break;
 371 
 372     default:
 373       assert(0, "no valid evex tuple_table entry");
 374       break;
 375     }
 376 
 377     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 378       int disp_factor = tuple_table[cur_tuple_type + mod_idx][vector_len];
 379       if ((disp % disp_factor) == 0) {
 380         int new_disp = disp / disp_factor;
 381         if ((-0x80 <= new_disp && new_disp < 0x80)) {
 382           disp = new_disp;
 383         }
 384       } else {
 385         return false;
 386       }
 387     }
 388   }
 389   return (-0x80 <= disp && disp < 0x80);
 390 }
 391 
 392 
 393 bool Assembler::emit_compressed_disp_byte(int &disp) {
 394   int mod_idx = 0;
 395   // We will test if the displacement fits the compressed format and if so
 396   // apply the compression to the displacment iff the result is8bit.
 397   if (VM_Version::supports_evex() && is_evex_instruction) {
 398     switch (tuple_type) {
 399     case EVEX_FV:
 400       if ((evex_encoding & VEX_W) == VEX_W) {
 401         mod_idx += 2 + ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 402       } else {
 403         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 404       }
 405       break;
 406 
 407     case EVEX_HV:
 408       mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 409       break;
 410 
 411     case EVEX_FVM:
 412       break;
 413 
 414     case EVEX_T1S:
 415       switch (input_size_in_bits) {
 416       case EVEX_8bit:
 417         break;
 418 
 419       case EVEX_16bit:
 420         mod_idx = 1;
 421         break;
 422 
 423       case EVEX_32bit:
 424         mod_idx = 2;
 425         break;
 426 
 427       case EVEX_64bit:
 428         mod_idx = 3;
 429         break;
 430       }
 431       break;
 432 
 433     case EVEX_T1F:
 434     case EVEX_T2:
 435     case EVEX_T4:
 436       mod_idx = (input_size_in_bits == EVEX_64bit) ? 1 : 0;
 437       break;
 438 
 439     case EVEX_T8:
 440       break;
 441 
 442     case EVEX_HVM:
 443       break;
 444 
 445     case EVEX_QVM:
 446       break;
 447 
 448     case EVEX_OVM:
 449       break;
 450 
 451     case EVEX_M128:
 452       break;
 453 
 454     case EVEX_DUP:
 455       break;
 456 
 457     default:
 458       assert(0, "no valid evex tuple_table entry");
 459       break;
 460     }
 461 
 462     if (avx_vector_len >= AVX_128bit && avx_vector_len <= AVX_512bit) {
 463       int disp_factor = tuple_table[tuple_type + mod_idx][avx_vector_len];
 464       if ((disp % disp_factor) == 0) {
 465         int new_disp = disp / disp_factor;
 466         if (is8bit(new_disp)) {
 467           disp = new_disp;
 468         }
 469       } else {
 470         return false;
 471       }
 472     }
 473   }
 474   return is8bit(disp);
 475 }
 476 
 477 
 478 void Assembler::emit_operand(Register reg, Register base, Register index,
 479                              Address::ScaleFactor scale, int disp,
 480                              RelocationHolder const& rspec,
 481                              int rip_relative_correction) {
 482   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 483 
 484   // Encode the registers as needed in the fields they are used in
 485 
 486   int regenc = encode(reg) << 3;
 487   int indexenc = index->is_valid() ? encode(index) << 3 : 0;
 488   int baseenc = base->is_valid() ? encode(base) : 0;
 489 
 490   if (base->is_valid()) {
 491     if (index->is_valid()) {
 492       assert(scale != Address::no_scale, "inconsistent address");
 493       // [base + index*scale + disp]
 494       if (disp == 0 && rtype == relocInfo::none  &&
 495           base != rbp LP64_ONLY(&& base != r13)) {
 496         // [base + index*scale]
 497         // [00 reg 100][ss index base]
 498         assert(index != rsp, "illegal addressing mode");
 499         emit_int8(0x04 | regenc);
 500         emit_int8(scale << 6 | indexenc | baseenc);
 501       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 502         // [base + index*scale + imm8]
 503         // [01 reg 100][ss index base] imm8
 504         assert(index != rsp, "illegal addressing mode");
 505         emit_int8(0x44 | regenc);
 506         emit_int8(scale << 6 | indexenc | baseenc);
 507         emit_int8(disp & 0xFF);
 508       } else {
 509         // [base + index*scale + disp32]
 510         // [10 reg 100][ss index base] disp32
 511         assert(index != rsp, "illegal addressing mode");
 512         emit_int8(0x84 | regenc);
 513         emit_int8(scale << 6 | indexenc | baseenc);
 514         emit_data(disp, rspec, disp32_operand);
 515       }
 516     } else if (base == rsp LP64_ONLY(|| base == r12)) {
 517       // [rsp + disp]
 518       if (disp == 0 && rtype == relocInfo::none) {
 519         // [rsp]
 520         // [00 reg 100][00 100 100]
 521         emit_int8(0x04 | regenc);
 522         emit_int8(0x24);
 523       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 524         // [rsp + imm8]
 525         // [01 reg 100][00 100 100] disp8
 526         emit_int8(0x44 | regenc);
 527         emit_int8(0x24);
 528         emit_int8(disp & 0xFF);
 529       } else {
 530         // [rsp + imm32]
 531         // [10 reg 100][00 100 100] disp32
 532         emit_int8(0x84 | regenc);
 533         emit_int8(0x24);
 534         emit_data(disp, rspec, disp32_operand);
 535       }
 536     } else {
 537       // [base + disp]
 538       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
 539       if (disp == 0 && rtype == relocInfo::none &&
 540           base != rbp LP64_ONLY(&& base != r13)) {
 541         // [base]
 542         // [00 reg base]
 543         emit_int8(0x00 | regenc | baseenc);
 544       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 545         // [base + disp8]
 546         // [01 reg base] disp8
 547         emit_int8(0x40 | regenc | baseenc);
 548         emit_int8(disp & 0xFF);
 549       } else {
 550         // [base + disp32]
 551         // [10 reg base] disp32
 552         emit_int8(0x80 | regenc | baseenc);
 553         emit_data(disp, rspec, disp32_operand);
 554       }
 555     }
 556   } else {
 557     if (index->is_valid()) {
 558       assert(scale != Address::no_scale, "inconsistent address");
 559       // [index*scale + disp]
 560       // [00 reg 100][ss index 101] disp32
 561       assert(index != rsp, "illegal addressing mode");
 562       emit_int8(0x04 | regenc);
 563       emit_int8(scale << 6 | indexenc | 0x05);
 564       emit_data(disp, rspec, disp32_operand);

 574       // at the start of the instruction. That needs more correction here.
 575       // intptr_t disp = target - next_ip;
 576       assert(inst_mark() != NULL, "must be inside InstructionMark");
 577       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 578       int64_t adjusted = disp;
 579       // Do rip-rel adjustment for 64bit
 580       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 581       assert(is_simm32(adjusted),
 582              "must be 32bit offset (RIP relative address)");
 583       emit_data((int32_t) adjusted, rspec, disp32_operand);
 584 
 585     } else {
 586       // 32bit never did this, did everything as the rip-rel/disp code above
 587       // [disp] ABSOLUTE
 588       // [00 reg 100][00 100 101] disp32
 589       emit_int8(0x04 | regenc);
 590       emit_int8(0x25);
 591       emit_data(disp, rspec, disp32_operand);
 592     }
 593   }
 594   is_evex_instruction = false;
 595 }
 596 
 597 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 598                              Address::ScaleFactor scale, int disp,
 599                              RelocationHolder const& rspec) {
 600   if (UseAVX > 2) {
 601     int xreg_enc = reg->encoding();
 602     if (xreg_enc > 15) {
 603       XMMRegister new_reg = as_XMMRegister(xreg_enc & 0xf);
 604       emit_operand((Register)new_reg, base, index, scale, disp, rspec);
 605       return;
 606     }
 607   }
 608   emit_operand((Register)reg, base, index, scale, disp, rspec);
 609 }
 610 
 611 // Secret local extension to Assembler::WhichOperand:
 612 #define end_pc_operand (_WhichOperand_limit)
 613 
 614 address Assembler::locate_operand(address inst, WhichOperand which) {
 615   // Decode the given instruction, and return the address of
 616   // an embedded 32-bit operand word.
 617 
 618   // If "which" is disp32_operand, selects the displacement portion
 619   // of an effective address specifier.
 620   // If "which" is imm64_operand, selects the trailing immediate constant.
 621   // If "which" is call32_operand, selects the displacement of a call or jump.
 622   // Caller is responsible for ensuring that there is such an operand,
 623   // and that it is 32/64 bits wide.
 624 
 625   // If "which" is end_pc_operand, find the end of the instruction.
 626 
 627   address ip = inst;

 880     // First byte
 881     if ((0xFF & *inst) == VEX_3bytes) {
 882       ip++; // third byte
 883       is_64bit = ((VEX_W & *ip) == VEX_W);
 884     }
 885     ip++; // opcode
 886     // To find the end of instruction (which == end_pc_operand).
 887     switch (0xFF & *ip) {
 888     case 0x61: // pcmpestri r, r/a, #8
 889     case 0x70: // pshufd r, r/a, #8
 890     case 0x73: // psrldq r, #8
 891       tail_size = 1;  // the imm8
 892       break;
 893     default:
 894       break;
 895     }
 896     ip++; // skip opcode
 897     debug_only(has_disp32 = true); // has both kinds of operands!
 898     break;
 899 
 900   case 0x62: // EVEX_4bytes
 901     assert((UseAVX > 0), "shouldn't have EVEX prefix");
 902     assert(ip == inst+1, "no prefixes allowed");
 903     // no EVEX collisions, all instructions that have 0x62 opcodes
 904     // have EVEX versions and are subopcodes of 0x66
 905     ip++; // skip P0 and exmaine W in P1
 906     is_64bit = ((VEX_W & *ip) == VEX_W);
 907     ip++; // move to P2
 908     ip++; // skip P2, move to opcode
 909     // To find the end of instruction (which == end_pc_operand).
 910     switch (0xFF & *ip) {
 911     case 0x61: // pcmpestri r, r/a, #8
 912     case 0x70: // pshufd r, r/a, #8
 913     case 0x73: // psrldq r, #8
 914       tail_size = 1;  // the imm8
 915       break;
 916     default:
 917       break;
 918     }
 919     ip++; // skip opcode
 920     debug_only(has_disp32 = true); // has both kinds of operands!
 921     break;
 922 
 923   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 924   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 925   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 926   case 0xDD: // fld_d a; fst_d a; fstp_d a
 927   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 928   case 0xDF: // fild_d a; fistp_d a
 929   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 930   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 931   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 932     debug_only(has_disp32 = true);
 933     break;
 934 
 935   case 0xE8: // call rdisp32
 936   case 0xE9: // jmp  rdisp32
 937     if (which == end_pc_operand)  return ip + 4;
 938     assert(which == call32_operand, "call has no disp32 or imm");
 939     return ip;
 940 
 941   case 0xF0:                    // Lock
 942     assert(os::is_MP(), "only on MP");

1202   emit_int8(0x0F);
1203   emit_int8(0x1F);
1204   emit_int8((unsigned char)0x80);
1205                    // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
1206   emit_int32(0);   // 32-bits offset (4 bytes)
1207 }
1208 
1209 void Assembler::addr_nop_8() {
1210   assert(UseAddressNop, "no CPU support");
1211   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1212   emit_int8(0x0F);
1213   emit_int8(0x1F);
1214   emit_int8((unsigned char)0x84);
1215                    // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
1216   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1217   emit_int32(0);   // 32-bits offset (4 bytes)
1218 }
1219 
1220 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
1221   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1222   if (VM_Version::supports_evex()) {
1223     emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2);
1224   } else {
1225     emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
1226   }
1227 }
1228 
1229 void Assembler::addsd(XMMRegister dst, Address src) {
1230   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1231   if (VM_Version::supports_evex()) {
1232     tuple_type = EVEX_T1S;
1233     input_size_in_bits = EVEX_64bit;
1234     emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2);
1235   } else {
1236     emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
1237   }
1238 }
1239 
1240 void Assembler::addss(XMMRegister dst, XMMRegister src) {
1241   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1242   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
1243 }
1244 
1245 void Assembler::addss(XMMRegister dst, Address src) {
1246   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1247   if (VM_Version::supports_evex()) {
1248     tuple_type = EVEX_T1S;
1249     input_size_in_bits = EVEX_32bit;
1250   }
1251   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
1252 }
1253 
1254 void Assembler::aesdec(XMMRegister dst, Address src) {
1255   assert(VM_Version::supports_aes(), "");
1256   InstructionMark im(this);
1257   simd_prefix(dst, dst, src, VEX_SIMD_66, false,
1258               VEX_OPCODE_0F_38, false, AVX_128bit, true);
1259   emit_int8((unsigned char)0xDE);
1260   emit_operand(dst, src);
1261 }
1262 
1263 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1264   assert(VM_Version::supports_aes(), "");
1265   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
1266                                       VEX_OPCODE_0F_38, false, AVX_128bit, true);
1267   emit_int8((unsigned char)0xDE);
1268   emit_int8(0xC0 | encode);
1269 }
1270 
1271 void Assembler::aesdeclast(XMMRegister dst, Address src) {
1272   assert(VM_Version::supports_aes(), "");
1273   InstructionMark im(this);
1274   simd_prefix(dst, dst, src, VEX_SIMD_66, false,
1275               VEX_OPCODE_0F_38, false, AVX_128bit, true);
1276   emit_int8((unsigned char)0xDF);
1277   emit_operand(dst, src);
1278 }
1279 
1280 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1281   assert(VM_Version::supports_aes(), "");
1282   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
1283                                       VEX_OPCODE_0F_38, false, AVX_128bit, true);
1284   emit_int8((unsigned char)0xDF);
1285   emit_int8((unsigned char)(0xC0 | encode));
1286 }
1287 
1288 void Assembler::aesenc(XMMRegister dst, Address src) {
1289   assert(VM_Version::supports_aes(), "");
1290   InstructionMark im(this);
1291   simd_prefix(dst, dst, src, VEX_SIMD_66, false,
1292               VEX_OPCODE_0F_38, false, AVX_128bit, true);
1293   emit_int8((unsigned char)0xDC);
1294   emit_operand(dst, src);
1295 }
1296 
1297 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1298   assert(VM_Version::supports_aes(), "");
1299   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
1300                                       VEX_OPCODE_0F_38, false, AVX_128bit, true);
1301   emit_int8((unsigned char)0xDC);
1302   emit_int8(0xC0 | encode);
1303 }
1304 
1305 void Assembler::aesenclast(XMMRegister dst, Address src) {
1306   assert(VM_Version::supports_aes(), "");
1307   InstructionMark im(this);
1308   simd_prefix(dst, dst, src, VEX_SIMD_66, false,
1309               VEX_OPCODE_0F_38, false, AVX_128bit, true);
1310   emit_int8((unsigned char)0xDD);
1311   emit_operand(dst, src);
1312 }
1313 
1314 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1315   assert(VM_Version::supports_aes(), "");
1316   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
1317                                       VEX_OPCODE_0F_38, false, AVX_128bit, true);
1318   emit_int8((unsigned char)0xDD);
1319   emit_int8((unsigned char)(0xC0 | encode));
1320 }
1321 
1322 
1323 void Assembler::andl(Address dst, int32_t imm32) {
1324   InstructionMark im(this);
1325   prefix(dst);
1326   emit_int8((unsigned char)0x81);
1327   emit_operand(rsp, dst, 4);
1328   emit_int32(imm32);
1329 }
1330 
1331 void Assembler::andl(Register dst, int32_t imm32) {
1332   prefix(dst);
1333   emit_arith(0x81, 0xE0, dst, imm32);
1334 }
1335 
1336 void Assembler::andl(Register dst, Address src) {
1337   InstructionMark im(this);
1338   prefix(src, dst);
1339   emit_int8(0x23);
1340   emit_operand(dst, src);
1341 }
1342 
1343 void Assembler::andl(Register dst, Register src) {
1344   (void) prefix_and_encode(dst->encoding(), src->encoding());
1345   emit_arith(0x23, 0xC0, dst, src);
1346 }
1347 
1348 void Assembler::andnl(Register dst, Register src1, Register src2) {
1349   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1350   int encode = vex_prefix_0F38_and_encode(dst, src1, src2, false);
1351   emit_int8((unsigned char)0xF2);
1352   emit_int8((unsigned char)(0xC0 | encode));
1353 }
1354 
1355 void Assembler::andnl(Register dst, Register src1, Address src2) {
1356   InstructionMark im(this);
1357   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1358   vex_prefix_0F38(dst, src1, src2, false);
1359   emit_int8((unsigned char)0xF2);
1360   emit_operand(dst, src2);
1361 }
1362 
1363 void Assembler::bsfl(Register dst, Register src) {
1364   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1365   emit_int8(0x0F);
1366   emit_int8((unsigned char)0xBC);
1367   emit_int8((unsigned char)(0xC0 | encode));
1368 }
1369 
1370 void Assembler::bsrl(Register dst, Register src) {
1371   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1372   emit_int8(0x0F);
1373   emit_int8((unsigned char)0xBD);
1374   emit_int8((unsigned char)(0xC0 | encode));
1375 }
1376 
1377 void Assembler::bswapl(Register reg) { // bswap
1378   int encode = prefix_and_encode(reg->encoding());
1379   emit_int8(0x0F);
1380   emit_int8((unsigned char)(0xC8 | encode));
1381 }
1382 
1383 void Assembler::blsil(Register dst, Register src) {
1384   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1385   int encode = vex_prefix_0F38_and_encode(rbx, dst, src, false);
1386   emit_int8((unsigned char)0xF3);
1387   emit_int8((unsigned char)(0xC0 | encode));
1388 }
1389 
1390 void Assembler::blsil(Register dst, Address src) {
1391   InstructionMark im(this);
1392   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1393   vex_prefix_0F38(rbx, dst, src, false);
1394   emit_int8((unsigned char)0xF3);
1395   emit_operand(rbx, src);
1396 }
1397 
1398 void Assembler::blsmskl(Register dst, Register src) {
1399   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1400   int encode = vex_prefix_0F38_and_encode(rdx, dst, src, false);
1401   emit_int8((unsigned char)0xF3);
1402   emit_int8((unsigned char)(0xC0 | encode));
1403 }
1404 
1405 void Assembler::blsmskl(Register dst, Address src) {
1406   InstructionMark im(this);
1407   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1408   vex_prefix_0F38(rdx, dst, src, false);
1409   emit_int8((unsigned char)0xF3);
1410   emit_operand(rdx, src);
1411 }
1412 
1413 void Assembler::blsrl(Register dst, Register src) {
1414   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1415   int encode = vex_prefix_0F38_and_encode(rcx, dst, src, false);
1416   emit_int8((unsigned char)0xF3);
1417   emit_int8((unsigned char)(0xC0 | encode));
1418 }
1419 
1420 void Assembler::blsrl(Register dst, Address src) {
1421   InstructionMark im(this);
1422   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1423   vex_prefix_0F38(rcx, dst, src, false);
1424   emit_int8((unsigned char)0xF3);
1425   emit_operand(rcx, src);
1426 }
1427 
1428 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1429   // suspect disp32 is always good
1430   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1431 
1432   if (L.is_bound()) {
1433     const int long_size = 5;
1434     int offs = (int)( target(L) - pc() );
1435     assert(offs <= 0, "assembler error");
1436     InstructionMark im(this);
1437     // 1110 1000 #32-bit disp
1438     emit_int8((unsigned char)0xE8);
1439     emit_data(offs - long_size, rtype, operand);
1440   } else {
1441     InstructionMark im(this);
1442     // 1110 1000 #32-bit disp
1443     L.add_patch_at(code(), locator());

1551   emit_int8(0x0F);
1552   emit_int8((unsigned char)0xB1);
1553   emit_operand(reg, adr);
1554 }
1555 
1556 // The 8-bit cmpxchg compares the value at adr with the contents of rax,
1557 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1558 // The ZF is set if the compared values were equal, and cleared otherwise.
1559 void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg
1560   InstructionMark im(this);
1561   prefix(adr, reg, true);
1562   emit_int8(0x0F);
1563   emit_int8((unsigned char)0xB0);
1564   emit_operand(reg, adr);
1565 }
1566 
1567 void Assembler::comisd(XMMRegister dst, Address src) {
1568   // NOTE: dbx seems to decode this as comiss even though the
1569   // 0x66 is there. Strangly ucomisd comes out correct
1570   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1571   if (VM_Version::supports_evex()) {
1572     tuple_type = EVEX_T1S;
1573     input_size_in_bits = EVEX_64bit;
1574     emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, true);
1575   } else {
1576     emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1577   }
1578 }
1579 
1580 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1581   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1582   if (VM_Version::supports_evex()) {
1583     emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, true);
1584   } else {
1585     emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1586   }
1587 }
1588 
1589 void Assembler::comiss(XMMRegister dst, Address src) {
1590   if (VM_Version::supports_evex()) {
1591     tuple_type = EVEX_T1S;
1592     input_size_in_bits = EVEX_32bit;
1593   }
1594   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1595   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, true);
1596 }
1597 
1598 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1599   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1600   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, true);
1601 }
1602 
1603 void Assembler::cpuid() {
1604   emit_int8(0x0F);
1605   emit_int8((unsigned char)0xA2);
1606 }
1607 
1608 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1609   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1610   emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3);
1611 }
1612 
1613 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1614   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1615   emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE);
1616 }
1617 
1618 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1619   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1620   if (VM_Version::supports_evex()) {
1621     emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2);
1622   } else {
1623     emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1624   }
1625 }
1626 
1627 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1628   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1629   if (VM_Version::supports_evex()) {
1630     tuple_type = EVEX_T1F;
1631     input_size_in_bits = EVEX_64bit;
1632     emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2);
1633   } else {
1634     emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1635   }
1636 }
1637 
1638 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1639   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1640   int encode = 0;
1641   if (VM_Version::supports_evex()) {
1642     encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2, true);
1643   } else {
1644     encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, false);
1645   }
1646   emit_int8(0x2A);
1647   emit_int8((unsigned char)(0xC0 | encode));
1648 }
1649 
1650 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1651   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1652   if (VM_Version::supports_evex()) {
1653     tuple_type = EVEX_T1S;
1654     input_size_in_bits = EVEX_32bit;
1655     emit_simd_arith_q(0x2A, dst, src, VEX_SIMD_F2, true);
1656   } else {
1657     emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
1658   }
1659 }
1660 
1661 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1662   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1663   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, true);
1664   emit_int8(0x2A);
1665   emit_int8((unsigned char)(0xC0 | encode));
1666 }
1667 
1668 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1669   if (VM_Version::supports_evex()) {
1670     tuple_type = EVEX_T1S;
1671     input_size_in_bits = EVEX_32bit;
1672   }
1673   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1674   emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3, true);
1675 }
1676 
1677 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1678   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1679   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1680 }
1681 
1682 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1683   if (VM_Version::supports_evex()) {
1684     tuple_type = EVEX_T1S;
1685     input_size_in_bits = EVEX_32bit;
1686   }
1687   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1688   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1689 }
1690 
1691 
1692 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1693   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1694   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, true);
1695   emit_int8(0x2C);
1696   emit_int8((unsigned char)(0xC0 | encode));
1697 }
1698 
1699 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1700   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1701   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, true);
1702   emit_int8(0x2C);
1703   emit_int8((unsigned char)(0xC0 | encode));
1704 }
1705 
1706 void Assembler::decl(Address dst) {
1707   // Don't use it directly. Use MacroAssembler::decrement() instead.
1708   InstructionMark im(this);
1709   prefix(dst);
1710   emit_int8((unsigned char)0xFF);
1711   emit_operand(rcx, dst);
1712 }
1713 
1714 void Assembler::divsd(XMMRegister dst, Address src) {
1715   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1716   if (VM_Version::supports_evex()) {
1717     tuple_type = EVEX_T1S;
1718     input_size_in_bits = EVEX_64bit;
1719     emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2);
1720   } else {
1721     emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1722   }
1723 }
1724 
1725 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1726   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1727   if (VM_Version::supports_evex()) {
1728     emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2);
1729   } else {
1730     emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1731   }
1732 }
1733 
1734 void Assembler::divss(XMMRegister dst, Address src) {
1735   if (VM_Version::supports_evex()) {
1736     tuple_type = EVEX_T1S;
1737     input_size_in_bits = EVEX_32bit;
1738   }
1739   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1740   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1741 }
1742 
1743 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1744   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1745   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1746 }
1747 
1748 void Assembler::emms() {
1749   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1750   emit_int8(0x0F);
1751   emit_int8(0x77);
1752 }
1753 
1754 void Assembler::hlt() {
1755   emit_int8((unsigned char)0xF4);
1756 }
1757 
1758 void Assembler::idivl(Register src) {

1971   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1972   emit_int8(0x0F);
1973   emit_int8((unsigned char)0xBD);
1974   emit_int8((unsigned char)(0xC0 | encode));
1975 }
1976 
1977 // Emit mfence instruction
1978 void Assembler::mfence() {
1979   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
1980   emit_int8(0x0F);
1981   emit_int8((unsigned char)0xAE);
1982   emit_int8((unsigned char)0xF0);
1983 }
1984 
1985 void Assembler::mov(Register dst, Register src) {
1986   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
1987 }
1988 
1989 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1990   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1991   if (VM_Version::supports_evex()) {
1992     emit_simd_arith_nonds_q(0x28, dst, src, VEX_SIMD_66, true);
1993   } else {
1994     emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
1995   }
1996 }
1997 
1998 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1999   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2000   emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE);
2001 }
2002 
2003 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
2004   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2005   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, true, VEX_OPCODE_0F,
2006                                       false, AVX_128bit);
2007   emit_int8(0x16);
2008   emit_int8((unsigned char)(0xC0 | encode));
2009 }
2010 
2011 void Assembler::movb(Register dst, Address src) {
2012   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2013   InstructionMark im(this);
2014   prefix(src, dst, true);
2015   emit_int8((unsigned char)0x8A);
2016   emit_operand(dst, src);
2017 }
2018 
2019 void Assembler::kmovq(KRegister dst, KRegister src) {
2020   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2021   int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE,
2022                                       true, VEX_OPCODE_0F, true);
2023   emit_int8((unsigned char)0x90);
2024   emit_int8((unsigned char)(0xC0 | encode));
2025 }
2026 
2027 void Assembler::kmovq(KRegister dst, Address src) {
2028   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2029   int dst_enc = dst->encoding();
2030   int nds_enc = 0;
2031   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_NONE,
2032              VEX_OPCODE_0F, true, AVX_128bit, true, true);
2033   emit_int8((unsigned char)0x90);
2034   emit_operand((Register)dst, src);
2035 }
2036 
2037 void Assembler::kmovq(Address dst, KRegister src) {
2038   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2039   int src_enc = src->encoding();
2040   int nds_enc = 0;
2041   vex_prefix(dst, nds_enc, src_enc, VEX_SIMD_NONE,
2042              VEX_OPCODE_0F, true, AVX_128bit, true, true);
2043   emit_int8((unsigned char)0x90);
2044   emit_operand((Register)src, dst);
2045 }
2046 
2047 void Assembler::kmovql(KRegister dst, Register src) {
2048   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2049   bool supports_bw = VM_Version::supports_avx512bw();
2050   VexSimdPrefix pre = supports_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
2051   int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, true,
2052                                       VEX_OPCODE_0F, supports_bw);
2053   emit_int8((unsigned char)0x92);
2054   emit_int8((unsigned char)(0xC0 | encode));
2055 }
2056 
2057 void Assembler::kmovdl(KRegister dst, Register src) {
2058   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2059   VexSimdPrefix pre = VM_Version::supports_avx512bw() ? VEX_SIMD_F2 : VEX_SIMD_NONE;
2060   int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, true, VEX_OPCODE_0F, false);
2061   emit_int8((unsigned char)0x92);
2062   emit_int8((unsigned char)(0xC0 | encode));
2063 }
2064 
2065 void Assembler::movb(Address dst, int imm8) {
2066   InstructionMark im(this);
2067    prefix(dst);
2068   emit_int8((unsigned char)0xC6);
2069   emit_operand(rax, dst, 1);
2070   emit_int8(imm8);
2071 }
2072 
2073 
2074 void Assembler::movb(Address dst, Register src) {
2075   assert(src->has_byte_register(), "must have byte register");
2076   InstructionMark im(this);
2077   prefix(dst, src, true);
2078   emit_int8((unsigned char)0x88);
2079   emit_operand(src, dst);
2080 }
2081 
2082 void Assembler::movdl(XMMRegister dst, Register src) {
2083   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2084   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, true);
2085   emit_int8(0x6E);
2086   emit_int8((unsigned char)(0xC0 | encode));
2087 }
2088 
2089 void Assembler::movdl(Register dst, XMMRegister src) {
2090   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2091   // swap src/dst to get correct prefix
2092   int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66, true);
2093   emit_int8(0x7E);
2094   emit_int8((unsigned char)(0xC0 | encode));
2095 }
2096 
2097 void Assembler::movdl(XMMRegister dst, Address src) {
2098   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2099   if (VM_Version::supports_evex()) {
2100     tuple_type = EVEX_T1S;
2101     input_size_in_bits = EVEX_32bit;
2102   }
2103   InstructionMark im(this);
2104   simd_prefix(dst, src, VEX_SIMD_66, true, VEX_OPCODE_0F);
2105   emit_int8(0x6E);
2106   emit_operand(dst, src);
2107 }
2108 
2109 void Assembler::movdl(Address dst, XMMRegister src) {
2110   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2111   if (VM_Version::supports_evex()) {
2112     tuple_type = EVEX_T1S;
2113     input_size_in_bits = EVEX_32bit;
2114   }
2115   InstructionMark im(this);
2116   simd_prefix(dst, src, VEX_SIMD_66, true);
2117   emit_int8(0x7E);
2118   emit_operand(src, dst);
2119 }
2120 
2121 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
2122   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2123   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
2124 }
2125 
2126 void Assembler::movdqa(XMMRegister dst, Address src) {
2127   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2128   if (VM_Version::supports_evex()) {
2129     tuple_type = EVEX_FVM;
2130   }
2131   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
2132 }
2133 
2134 void Assembler::movdqu(XMMRegister dst, Address src) {
2135   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2136   if (VM_Version::supports_evex()) {
2137     tuple_type = EVEX_FVM;
2138   }
2139   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
2140 }
2141 
2142 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
2143   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2144   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
2145 }
2146 
2147 void Assembler::movdqu(Address dst, XMMRegister src) {
2148   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2149   if (VM_Version::supports_evex()) {
2150     tuple_type = EVEX_FVM;
2151   }
2152   InstructionMark im(this);
2153   simd_prefix(dst, src, VEX_SIMD_F3, false);
2154   emit_int8(0x7F);
2155   emit_operand(src, dst);
2156 }
2157 
2158 // Move Unaligned 256bit Vector
2159 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2160   assert(UseAVX > 0, "");
2161   if (VM_Version::supports_evex()) {
2162     tuple_type = EVEX_FVM;
2163   }
2164   int vector_len = AVX_256bit;
2165   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
2166   emit_int8(0x6F);
2167   emit_int8((unsigned char)(0xC0 | encode));
2168 }
2169 
2170 void Assembler::vmovdqu(XMMRegister dst, Address src) {
2171   assert(UseAVX > 0, "");
2172   if (VM_Version::supports_evex()) {
2173     tuple_type = EVEX_FVM;
2174   }
2175   InstructionMark im(this);
2176   int vector_len = AVX_256bit;
2177   vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len, false);
2178   emit_int8(0x6F);
2179   emit_operand(dst, src);
2180 }
2181 
2182 void Assembler::vmovdqu(Address dst, XMMRegister src) {
2183   assert(UseAVX > 0, "");
2184   if (VM_Version::supports_evex()) {
2185     tuple_type = EVEX_FVM;
2186   }
2187   InstructionMark im(this);
2188   int vector_len = AVX_256bit;
2189   // swap src<->dst for encoding
2190   assert(src != xnoreg, "sanity");
2191   vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len, false);
2192   emit_int8(0x7F);
2193   emit_operand(src, dst);
2194 }
2195 
2196 // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
2197 void Assembler::evmovdqu(XMMRegister dst, XMMRegister src, int vector_len) {
2198   assert(UseAVX > 0, "");
2199   int src_enc = src->encoding();
2200   int dst_enc = dst->encoding();
2201   int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_F3, VEX_OPCODE_0F,
2202                                      true, vector_len, false, false);
2203   emit_int8(0x6F);
2204   emit_int8((unsigned char)(0xC0 | encode));
2205 }
2206 
2207 void Assembler::evmovdqu(XMMRegister dst, Address src, int vector_len) {
2208   assert(UseAVX > 0, "");
2209   InstructionMark im(this);
2210   if (VM_Version::supports_evex()) {
2211     tuple_type = EVEX_FVM;
2212     vex_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, vector_len, false);
2213   } else {
2214     vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len, false);
2215   }
2216   emit_int8(0x6F);
2217   emit_operand(dst, src);
2218 }
2219 
2220 void Assembler::evmovdqu(Address dst, XMMRegister src, int vector_len) {
2221   assert(UseAVX > 0, "");
2222   InstructionMark im(this);
2223   assert(src != xnoreg, "sanity");
2224   if (VM_Version::supports_evex()) {
2225     tuple_type = EVEX_FVM;
2226     // swap src<->dst for encoding
2227     vex_prefix_q(src, xnoreg, dst, VEX_SIMD_F3, vector_len, false);
2228   } else {
2229     // swap src<->dst for encoding
2230     vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len, false);
2231   }
2232   emit_int8(0x7F);
2233   emit_operand(src, dst);
2234 }
2235 
2236 // Uses zero extension on 64bit
2237 
2238 void Assembler::movl(Register dst, int32_t imm32) {
2239   int encode = prefix_and_encode(dst->encoding());
2240   emit_int8((unsigned char)(0xB8 | encode));
2241   emit_int32(imm32);
2242 }
2243 
2244 void Assembler::movl(Register dst, Register src) {
2245   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2246   emit_int8((unsigned char)0x8B);
2247   emit_int8((unsigned char)(0xC0 | encode));
2248 }
2249 
2250 void Assembler::movl(Register dst, Address src) {
2251   InstructionMark im(this);

2257 void Assembler::movl(Address dst, int32_t imm32) {
2258   InstructionMark im(this);
2259   prefix(dst);
2260   emit_int8((unsigned char)0xC7);
2261   emit_operand(rax, dst, 4);
2262   emit_int32(imm32);
2263 }
2264 
2265 void Assembler::movl(Address dst, Register src) {
2266   InstructionMark im(this);
2267   prefix(dst, src);
2268   emit_int8((unsigned char)0x89);
2269   emit_operand(src, dst);
2270 }
2271 
2272 // New cpus require to use movsd and movss to avoid partial register stall
2273 // when loading from memory. But for old Opteron use movlpd instead of movsd.
2274 // The selection is done in MacroAssembler::movdbl() and movflt().
2275 void Assembler::movlpd(XMMRegister dst, Address src) {
2276   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2277   if (VM_Version::supports_evex()) {
2278     tuple_type = EVEX_T1S;
2279     input_size_in_bits = EVEX_32bit;
2280   }
2281   emit_simd_arith(0x12, dst, src, VEX_SIMD_66, true);
2282 }
2283 
2284 void Assembler::movq( MMXRegister dst, Address src ) {
2285   assert( VM_Version::supports_mmx(), "" );
2286   emit_int8(0x0F);
2287   emit_int8(0x6F);
2288   emit_operand(dst, src);
2289 }
2290 
2291 void Assembler::movq( Address dst, MMXRegister src ) {
2292   assert( VM_Version::supports_mmx(), "" );
2293   emit_int8(0x0F);
2294   emit_int8(0x7F);
2295   // workaround gcc (3.2.1-7a) bug
2296   // In that version of gcc with only an emit_operand(MMX, Address)
2297   // gcc will tail jump and try and reverse the parameters completely
2298   // obliterating dst in the process. By having a version available
2299   // that doesn't need to swap the args at the tail jump the bug is
2300   // avoided.
2301   emit_operand(dst, src);
2302 }
2303 
2304 void Assembler::movq(XMMRegister dst, Address src) {
2305   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2306   InstructionMark im(this);
2307   if (VM_Version::supports_evex()) {
2308     tuple_type = EVEX_T1S;
2309     input_size_in_bits = EVEX_64bit;
2310     simd_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, true);
2311   } else {
2312     simd_prefix(dst, src, VEX_SIMD_F3, true, VEX_OPCODE_0F);
2313   }
2314   emit_int8(0x7E);
2315   emit_operand(dst, src);
2316 }
2317 
2318 void Assembler::movq(Address dst, XMMRegister src) {
2319   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2320   InstructionMark im(this);
2321   if (VM_Version::supports_evex()) {
2322     tuple_type = EVEX_T1S;
2323     input_size_in_bits = EVEX_64bit;
2324     simd_prefix(src, xnoreg, dst, VEX_SIMD_66, true,
2325                 VEX_OPCODE_0F, true, AVX_128bit);
2326   } else {
2327     simd_prefix(dst, src, VEX_SIMD_66, true);
2328   }
2329   emit_int8((unsigned char)0xD6);
2330   emit_operand(src, dst);
2331 }
2332 
2333 void Assembler::movsbl(Register dst, Address src) { // movsxb
2334   InstructionMark im(this);
2335   prefix(src, dst);
2336   emit_int8(0x0F);
2337   emit_int8((unsigned char)0xBE);
2338   emit_operand(dst, src);
2339 }
2340 
2341 void Assembler::movsbl(Register dst, Register src) { // movsxb
2342   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2343   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
2344   emit_int8(0x0F);
2345   emit_int8((unsigned char)0xBE);
2346   emit_int8((unsigned char)(0xC0 | encode));
2347 }
2348 
2349 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
2350   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2351   if (VM_Version::supports_evex()) {
2352     emit_simd_arith_q(0x10, dst, src, VEX_SIMD_F2, true);
2353   } else {
2354     emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
2355   }
2356 }
2357 
2358 void Assembler::movsd(XMMRegister dst, Address src) {
2359   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2360   if (VM_Version::supports_evex()) {
2361     tuple_type = EVEX_T1S;
2362     input_size_in_bits = EVEX_64bit;
2363     emit_simd_arith_nonds_q(0x10, dst, src, VEX_SIMD_F2, true);
2364   } else {
2365     emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
2366   }
2367 }
2368 
2369 void Assembler::movsd(Address dst, XMMRegister src) {
2370   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2371   InstructionMark im(this);
2372   if (VM_Version::supports_evex()) {
2373     tuple_type = EVEX_T1S;
2374     input_size_in_bits = EVEX_64bit;
2375     simd_prefix_q(src, xnoreg, dst, VEX_SIMD_F2);
2376   } else {
2377     simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, false);
2378   }
2379   emit_int8(0x11);
2380   emit_operand(src, dst);
2381 }
2382 
2383 void Assembler::movss(XMMRegister dst, XMMRegister src) {
2384   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2385   emit_simd_arith(0x10, dst, src, VEX_SIMD_F3, true);
2386 }
2387 
2388 void Assembler::movss(XMMRegister dst, Address src) {
2389   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2390   if (VM_Version::supports_evex()) {
2391     tuple_type = EVEX_T1S;
2392     input_size_in_bits = EVEX_32bit;
2393   }
2394   emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3, true);
2395 }
2396 
2397 void Assembler::movss(Address dst, XMMRegister src) {
2398   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2399   if (VM_Version::supports_evex()) {
2400     tuple_type = EVEX_T1S;
2401     input_size_in_bits = EVEX_32bit;
2402   }
2403   InstructionMark im(this);
2404   simd_prefix(dst, src, VEX_SIMD_F3, false);
2405   emit_int8(0x11);
2406   emit_operand(src, dst);
2407 }
2408 
2409 void Assembler::movswl(Register dst, Address src) { // movsxw
2410   InstructionMark im(this);
2411   prefix(src, dst);
2412   emit_int8(0x0F);
2413   emit_int8((unsigned char)0xBF);
2414   emit_operand(dst, src);
2415 }
2416 
2417 void Assembler::movswl(Register dst, Register src) { // movsxw
2418   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2419   emit_int8(0x0F);
2420   emit_int8((unsigned char)0xBF);
2421   emit_int8((unsigned char)(0xC0 | encode));
2422 }
2423 
2424 void Assembler::movw(Address dst, int imm16) {

2476   emit_int8(0x0F);
2477   emit_int8((unsigned char)0xB7);
2478   emit_int8(0xC0 | encode);
2479 }
2480 
2481 void Assembler::mull(Address src) {
2482   InstructionMark im(this);
2483   prefix(src);
2484   emit_int8((unsigned char)0xF7);
2485   emit_operand(rsp, src);
2486 }
2487 
2488 void Assembler::mull(Register src) {
2489   int encode = prefix_and_encode(src->encoding());
2490   emit_int8((unsigned char)0xF7);
2491   emit_int8((unsigned char)(0xE0 | encode));
2492 }
2493 
2494 void Assembler::mulsd(XMMRegister dst, Address src) {
2495   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2496   if (VM_Version::supports_evex()) {
2497     tuple_type = EVEX_T1S;
2498     input_size_in_bits = EVEX_64bit;
2499     emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2);
2500   } else {
2501     emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
2502   }
2503 }
2504 
2505 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
2506   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2507   if (VM_Version::supports_evex()) {
2508     emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2);
2509   } else {
2510     emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
2511   }
2512 }
2513 
2514 void Assembler::mulss(XMMRegister dst, Address src) {
2515   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2516   if (VM_Version::supports_evex()) {
2517     tuple_type = EVEX_T1S;
2518     input_size_in_bits = EVEX_32bit;
2519   }
2520   emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
2521 }
2522 
2523 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
2524   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2525   emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
2526 }
2527 
2528 void Assembler::negl(Register dst) {
2529   int encode = prefix_and_encode(dst->encoding());
2530   emit_int8((unsigned char)0xF7);
2531   emit_int8((unsigned char)(0xD8 | encode));
2532 }
2533 
2534 void Assembler::nop(int i) {
2535 #ifdef ASSERT
2536   assert(i > 0, " ");
2537   // The fancy nops aren't currently recognized by debuggers making it a
2538   // pain to disassemble code while debugging. If asserts are on clearly
2539   // speed is not an issue so simply use the single byte traditional nop

2799 void Assembler::orl(Register dst, int32_t imm32) {
2800   prefix(dst);
2801   emit_arith(0x81, 0xC8, dst, imm32);
2802 }
2803 
2804 void Assembler::orl(Register dst, Address src) {
2805   InstructionMark im(this);
2806   prefix(src, dst);
2807   emit_int8(0x0B);
2808   emit_operand(dst, src);
2809 }
2810 
2811 void Assembler::orl(Register dst, Register src) {
2812   (void) prefix_and_encode(dst->encoding(), src->encoding());
2813   emit_arith(0x0B, 0xC0, dst, src);
2814 }
2815 
2816 void Assembler::packuswb(XMMRegister dst, Address src) {
2817   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2818   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2819   if (VM_Version::supports_evex()) {
2820     tuple_type = EVEX_FV;
2821     input_size_in_bits = EVEX_32bit;
2822   }
2823   emit_simd_arith(0x67, dst, src, VEX_SIMD_66,
2824                   false, (VM_Version::supports_avx512dq() == false));
2825 }
2826 
2827 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
2828   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2829   emit_simd_arith(0x67, dst, src, VEX_SIMD_66,
2830                   false, (VM_Version::supports_avx512dq() == false));
2831 }
2832 
2833 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
2834   assert(UseAVX > 0, "some form of AVX must be enabled");
2835   emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector_len,
2836                  false, (VM_Version::supports_avx512dq() == false));
2837 }
2838 
2839 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
2840   assert(VM_Version::supports_avx2(), "");
2841   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, false,
2842                                       VEX_OPCODE_0F_3A, true, vector_len);
2843   emit_int8(0x00);
2844   emit_int8(0xC0 | encode);
2845   emit_int8(imm8);
2846 }
2847 
2848 void Assembler::pause() {
2849   emit_int8((unsigned char)0xF3);
2850   emit_int8((unsigned char)0x90);
2851 }
2852 
2853 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
2854   assert(VM_Version::supports_sse4_2(), "");
2855   InstructionMark im(this);
2856   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, false, VEX_OPCODE_0F_3A,
2857               false, AVX_128bit, true);
2858   emit_int8(0x61);
2859   emit_operand(dst, src);
2860   emit_int8(imm8);
2861 }
2862 
2863 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
2864   assert(VM_Version::supports_sse4_2(), "");
2865   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, false,
2866                                       VEX_OPCODE_0F_3A, false, AVX_128bit, true);
2867   emit_int8(0x61);
2868   emit_int8((unsigned char)(0xC0 | encode));
2869   emit_int8(imm8);
2870 }
2871 
2872 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
2873   assert(VM_Version::supports_sse4_1(), "");
2874   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, true, VEX_OPCODE_0F_3A,
2875                                       false, AVX_128bit, (VM_Version::supports_avx512dq() == false));
2876   emit_int8(0x16);
2877   emit_int8((unsigned char)(0xC0 | encode));
2878   emit_int8(imm8);
2879 }
2880 
2881 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
2882   assert(VM_Version::supports_sse4_1(), "");
2883   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, true, VEX_OPCODE_0F_3A,
2884                                       false, AVX_128bit, (VM_Version::supports_avx512dq() == false));
2885   emit_int8(0x16);
2886   emit_int8((unsigned char)(0xC0 | encode));
2887   emit_int8(imm8);
2888 }
2889 
2890 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
2891   assert(VM_Version::supports_sse4_1(), "");
2892   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, true, VEX_OPCODE_0F_3A,
2893                                       false, AVX_128bit, (VM_Version::supports_avx512dq() == false));
2894   emit_int8(0x22);
2895   emit_int8((unsigned char)(0xC0 | encode));
2896   emit_int8(imm8);
2897 }
2898 
2899 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
2900   assert(VM_Version::supports_sse4_1(), "");
2901   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, true, VEX_OPCODE_0F_3A,
2902                                       false, AVX_128bit, (VM_Version::supports_avx512dq() == false));
2903   emit_int8(0x22);
2904   emit_int8((unsigned char)(0xC0 | encode));
2905   emit_int8(imm8);
2906 }
2907 
2908 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
2909   assert(VM_Version::supports_sse4_1(), "");
2910   if (VM_Version::supports_evex()) {
2911     tuple_type = EVEX_HVM;
2912   }
2913   InstructionMark im(this);
2914   simd_prefix(dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38);
2915   emit_int8(0x30);
2916   emit_operand(dst, src);
2917 }
2918 
2919 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
2920   assert(VM_Version::supports_sse4_1(), "");
2921   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38);
2922   emit_int8(0x30);
2923   emit_int8((unsigned char)(0xC0 | encode));
2924 }
2925 
2926 // generic
2927 void Assembler::pop(Register dst) {
2928   int encode = prefix_and_encode(dst->encoding());
2929   emit_int8(0x58 | encode);
2930 }
2931 
2932 void Assembler::popcntl(Register dst, Address src) {
2933   assert(VM_Version::supports_popcnt(), "must support");
2934   InstructionMark im(this);
2935   emit_int8((unsigned char)0xF3);
2936   prefix(src, dst);
2937   emit_int8(0x0F);
2938   emit_int8((unsigned char)0xB8);
2939   emit_operand(dst, src);
2940 }
2941

3004   InstructionMark im(this);
3005   prefetch_prefix(src);
3006   emit_int8(0x18);
3007   emit_operand(rbx, src); // 3, src
3008 }
3009 
3010 void Assembler::prefetchw(Address src) {
3011   assert(VM_Version::supports_3dnow_prefetch(), "must support");
3012   InstructionMark im(this);
3013   prefetch_prefix(src);
3014   emit_int8(0x0D);
3015   emit_operand(rcx, src); // 1, src
3016 }
3017 
3018 void Assembler::prefix(Prefix p) {
3019   emit_int8(p);
3020 }
3021 
3022 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
3023   assert(VM_Version::supports_ssse3(), "");
3024   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38,
3025                                       false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
3026   emit_int8(0x00);
3027   emit_int8((unsigned char)(0xC0 | encode));
3028 }
3029 
3030 void Assembler::pshufb(XMMRegister dst, Address src) {
3031   assert(VM_Version::supports_ssse3(), "");
3032   if (VM_Version::supports_evex()) {
3033     tuple_type = EVEX_FVM;
3034   }
3035   InstructionMark im(this);
3036   simd_prefix(dst, dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38,
3037               false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
3038   emit_int8(0x00);
3039   emit_operand(dst, src);
3040 }
3041 
3042 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
3043   assert(isByte(mode), "invalid value");
3044   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3045   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
3046   emit_int8(mode & 0xFF);
3047 
3048 }
3049 
3050 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
3051   assert(isByte(mode), "invalid value");
3052   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3053   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3054   if (VM_Version::supports_evex()) {
3055     tuple_type = EVEX_FV;
3056     input_size_in_bits = EVEX_32bit;
3057   }
3058   InstructionMark im(this);
3059   simd_prefix(dst, src, VEX_SIMD_66, false);
3060   emit_int8(0x70);
3061   emit_operand(dst, src);
3062   emit_int8(mode & 0xFF);
3063 }
3064 
3065 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
3066   assert(isByte(mode), "invalid value");
3067   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3068   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2, false,
3069                         (VM_Version::supports_avx512bw() == false));
3070   emit_int8(mode & 0xFF);
3071 }
3072 
3073 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
3074   assert(isByte(mode), "invalid value");
3075   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3076   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3077   if (VM_Version::supports_evex()) {
3078     tuple_type = EVEX_FVM;
3079   }
3080   InstructionMark im(this);
3081   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, false, VEX_OPCODE_0F,
3082               false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
3083   emit_int8(0x70);
3084   emit_operand(dst, src);
3085   emit_int8(mode & 0xFF);
3086 }
3087 
3088 void Assembler::psrldq(XMMRegister dst, int shift) {
3089   // Shift 128 bit value in xmm register by number of bytes.
3090   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3091   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, true, VEX_OPCODE_0F,
3092                                       false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
3093   emit_int8(0x73);
3094   emit_int8((unsigned char)(0xC0 | encode));
3095   emit_int8(shift);
3096 }
3097 
3098 void Assembler::ptest(XMMRegister dst, Address src) {
3099   assert(VM_Version::supports_sse4_1(), "");
3100   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3101   InstructionMark im(this);
3102   simd_prefix(dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38);
3103   emit_int8(0x17);
3104   emit_operand(dst, src);
3105 }
3106 
3107 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
3108   assert(VM_Version::supports_sse4_1(), "");
3109   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
3110                                       false, VEX_OPCODE_0F_38);
3111   emit_int8(0x17);
3112   emit_int8((unsigned char)(0xC0 | encode));
3113 }
3114 
3115 void Assembler::vptest(XMMRegister dst, Address src) {
3116   assert(VM_Version::supports_avx(), "");
3117   InstructionMark im(this);
3118   int vector_len = AVX_256bit;
3119   assert(dst != xnoreg, "sanity");
3120   int dst_enc = dst->encoding();
3121   // swap src<->dst for encoding
3122   vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len);
3123   emit_int8(0x17);
3124   emit_operand(dst, src);
3125 }
3126 
3127 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
3128   assert(VM_Version::supports_avx(), "");
3129   int vector_len = AVX_256bit;
3130   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
3131                                      vector_len, VEX_OPCODE_0F_38);
3132   emit_int8(0x17);
3133   emit_int8((unsigned char)(0xC0 | encode));
3134 }
3135 
3136 void Assembler::punpcklbw(XMMRegister dst, Address src) {
3137   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3138   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3139   if (VM_Version::supports_evex()) {
3140     tuple_type = EVEX_FVM;
3141   }
3142   emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
3143 }
3144 
3145 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
3146   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3147   emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
3148 }
3149 
3150 void Assembler::punpckldq(XMMRegister dst, Address src) {
3151   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3152   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3153   if (VM_Version::supports_evex()) {
3154     tuple_type = EVEX_FV;
3155     input_size_in_bits = EVEX_32bit;
3156   }
3157   emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
3158 }
3159 
3160 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
3161   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3162   emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
3163 }
3164 
3165 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
3166   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3167   emit_simd_arith(0x6C, dst, src, VEX_SIMD_66);
3168 }
3169 
3170 void Assembler::push(int32_t imm32) {
3171   // in 64bits we push 64bits onto the stack but only
3172   // take a 32bit immediate
3173   emit_int8(0x68);
3174   emit_int32(imm32);
3175 }
3176

3346   assert(isShiftCount(imm8), "illegal shift count");
3347   int encode = prefix_and_encode(dst->encoding());
3348   emit_int8((unsigned char)0xC1);
3349   emit_int8((unsigned char)(0xE8 | encode));
3350   emit_int8(imm8);
3351 }
3352 
3353 void Assembler::shrl(Register dst) {
3354   int encode = prefix_and_encode(dst->encoding());
3355   emit_int8((unsigned char)0xD3);
3356   emit_int8((unsigned char)(0xE8 | encode));
3357 }
3358 
3359 // copies a single word from [esi] to [edi]
3360 void Assembler::smovl() {
3361   emit_int8((unsigned char)0xA5);
3362 }
3363 
3364 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
3365   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3366   if (VM_Version::supports_evex()) {
3367     emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2);
3368   } else {
3369     emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
3370   }
3371 }
3372 
3373 void Assembler::sqrtsd(XMMRegister dst, Address src) {
3374   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3375   if (VM_Version::supports_evex()) {
3376     tuple_type = EVEX_T1S;
3377     input_size_in_bits = EVEX_64bit;
3378     emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2);
3379   } else {
3380     emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
3381   }
3382 }
3383 
3384 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
3385   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3386   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
3387 }
3388 
3389 void Assembler::std() {
3390   emit_int8((unsigned char)0xFD);
3391 }
3392 
3393 void Assembler::sqrtss(XMMRegister dst, Address src) {
3394   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3395   if (VM_Version::supports_evex()) {
3396     tuple_type = EVEX_T1S;
3397     input_size_in_bits = EVEX_32bit;
3398   }
3399   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
3400 }
3401 
3402 void Assembler::stmxcsr( Address dst) {
3403   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3404   InstructionMark im(this);
3405   prefix(dst);
3406   emit_int8(0x0F);
3407   emit_int8((unsigned char)0xAE);
3408   emit_operand(as_Register(3), dst);
3409 }
3410 
3411 void Assembler::subl(Address dst, int32_t imm32) {
3412   InstructionMark im(this);
3413   prefix(dst);
3414   emit_arith_operand(0x81, rbp, dst, imm32);
3415 }
3416 
3417 void Assembler::subl(Address dst, Register src) {
3418   InstructionMark im(this);

3429 // Force generation of a 4 byte immediate value even if it fits into 8bit
3430 void Assembler::subl_imm32(Register dst, int32_t imm32) {
3431   prefix(dst);
3432   emit_arith_imm32(0x81, 0xE8, dst, imm32);
3433 }
3434 
3435 void Assembler::subl(Register dst, Address src) {
3436   InstructionMark im(this);
3437   prefix(src, dst);
3438   emit_int8(0x2B);
3439   emit_operand(dst, src);
3440 }
3441 
3442 void Assembler::subl(Register dst, Register src) {
3443   (void) prefix_and_encode(dst->encoding(), src->encoding());
3444   emit_arith(0x2B, 0xC0, dst, src);
3445 }
3446 
3447 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
3448   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3449   if (VM_Version::supports_evex()) {
3450     emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2);
3451   } else {
3452     emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
3453   }
3454 }
3455 
3456 void Assembler::subsd(XMMRegister dst, Address src) {
3457   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3458   if (VM_Version::supports_evex()) {
3459     tuple_type = EVEX_T1S;
3460     input_size_in_bits = EVEX_64bit;
3461   }
3462   emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2);
3463 }
3464 
3465 void Assembler::subss(XMMRegister dst, XMMRegister src) {
3466   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3467   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
3468 }
3469 
3470 void Assembler::subss(XMMRegister dst, Address src) {
3471   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3472   if (VM_Version::supports_evex()) {
3473     tuple_type = EVEX_T1S;
3474     input_size_in_bits = EVEX_32bit;
3475   }
3476   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
3477 }
3478 
3479 void Assembler::testb(Register dst, int imm8) {
3480   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
3481   (void) prefix_and_encode(dst->encoding(), true);
3482   emit_arith_b(0xF6, 0xC0, dst, imm8);
3483 }
3484 
3485 void Assembler::testl(Register dst, int32_t imm32) {
3486   // not using emit_arith because test
3487   // doesn't support sign-extension of
3488   // 8bit operands
3489   int encode = dst->encoding();
3490   if (encode == 0) {
3491     emit_int8((unsigned char)0xA9);
3492   } else {
3493     encode = prefix_and_encode(encode);
3494     emit_int8((unsigned char)0xF7);
3495     emit_int8((unsigned char)(0xC0 | encode));

3512 void Assembler::tzcntl(Register dst, Register src) {
3513   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
3514   emit_int8((unsigned char)0xF3);
3515   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3516   emit_int8(0x0F);
3517   emit_int8((unsigned char)0xBC);
3518   emit_int8((unsigned char)0xC0 | encode);
3519 }
3520 
3521 void Assembler::tzcntq(Register dst, Register src) {
3522   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
3523   emit_int8((unsigned char)0xF3);
3524   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3525   emit_int8(0x0F);
3526   emit_int8((unsigned char)0xBC);
3527   emit_int8((unsigned char)(0xC0 | encode));
3528 }
3529 
3530 void Assembler::ucomisd(XMMRegister dst, Address src) {
3531   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3532   if (VM_Version::supports_evex()) {
3533     tuple_type = EVEX_T1S;
3534     input_size_in_bits = EVEX_64bit;
3535     emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, true);
3536   } else {
3537     emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
3538   }
3539 }
3540 
3541 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
3542   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3543   if (VM_Version::supports_evex()) {
3544     emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, true);
3545   } else {
3546     emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
3547   }
3548 }
3549 
3550 void Assembler::ucomiss(XMMRegister dst, Address src) {
3551   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3552   if (VM_Version::supports_evex()) {
3553     tuple_type = EVEX_T1S;
3554     input_size_in_bits = EVEX_32bit;
3555   }
3556   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, true);
3557 }
3558 
3559 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
3560   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3561   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, true);
3562 }
3563 
3564 void Assembler::xabort(int8_t imm8) {
3565   emit_int8((unsigned char)0xC6);
3566   emit_int8((unsigned char)0xF8);
3567   emit_int8((unsigned char)(imm8 & 0xFF));
3568 }
3569 
3570 void Assembler::xaddl(Address dst, Register src) {
3571   InstructionMark im(this);
3572   prefix(dst, src);
3573   emit_int8(0x0F);
3574   emit_int8((unsigned char)0xC1);
3575   emit_operand(src, dst);
3576 }
3577 
3578 void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
3579   InstructionMark im(this);
3580   relocate(rtype);
3581   if (abort.is_bound()) {

3623   emit_arith(0x81, 0xF0, dst, imm32);
3624 }
3625 
3626 void Assembler::xorl(Register dst, Address src) {
3627   InstructionMark im(this);
3628   prefix(src, dst);
3629   emit_int8(0x33);
3630   emit_operand(dst, src);
3631 }
3632 
3633 void Assembler::xorl(Register dst, Register src) {
3634   (void) prefix_and_encode(dst->encoding(), src->encoding());
3635   emit_arith(0x33, 0xC0, dst, src);
3636 }
3637 
3638 
3639 // AVX 3-operands scalar float-point arithmetic instructions
3640 
3641 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
3642   assert(VM_Version::supports_avx(), "");
3643   if (VM_Version::supports_evex()) {
3644     tuple_type = EVEX_T1S;
3645     input_size_in_bits = EVEX_64bit;
3646     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3647   } else {
3648     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3649   }
3650 }
3651 
3652 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3653   assert(VM_Version::supports_avx(), "");
3654   if (VM_Version::supports_evex()) {
3655     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3656   } else {
3657     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3658   }
3659 }
3660 
3661 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
3662   assert(VM_Version::supports_avx(), "");
3663   if (VM_Version::supports_evex()) {
3664     tuple_type = EVEX_T1S;
3665     input_size_in_bits = EVEX_32bit;
3666   }
3667   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3668 }
3669 
3670 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3671   assert(VM_Version::supports_avx(), "");
3672   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3673 }
3674 
3675 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
3676   assert(VM_Version::supports_avx(), "");
3677   if (VM_Version::supports_evex()) {
3678     tuple_type = EVEX_T1S;
3679     input_size_in_bits = EVEX_64bit;
3680     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3681   } else {
3682     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3683   }
3684 }
3685 
3686 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3687   assert(VM_Version::supports_avx(), "");
3688   if (VM_Version::supports_evex()) {
3689     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3690   } else {
3691     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3692   }
3693 }
3694 
3695 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
3696   assert(VM_Version::supports_avx(), "");
3697   if (VM_Version::supports_evex()) {
3698     tuple_type = EVEX_T1S;
3699     input_size_in_bits = EVEX_32bit;
3700   }
3701   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3702 }
3703 
3704 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3705   assert(VM_Version::supports_avx(), "");
3706   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3707 }
3708 
3709 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
3710   assert(VM_Version::supports_avx(), "");
3711   if (VM_Version::supports_evex()) {
3712     tuple_type = EVEX_T1S;
3713     input_size_in_bits = EVEX_64bit;
3714     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3715   } else {
3716     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3717   }
3718 }
3719 
3720 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3721   assert(VM_Version::supports_avx(), "");
3722   if (VM_Version::supports_evex()) {
3723     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3724   } else {
3725     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3726   }
3727 }
3728 
3729 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
3730   assert(VM_Version::supports_avx(), "");
3731   if (VM_Version::supports_evex()) {
3732     tuple_type = EVEX_T1S;
3733     input_size_in_bits = EVEX_32bit;
3734   }
3735   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3736 }
3737 
3738 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3739   assert(VM_Version::supports_avx(), "");
3740   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3741 }
3742 
3743 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
3744   assert(VM_Version::supports_avx(), "");
3745   if (VM_Version::supports_evex()) {
3746     tuple_type = EVEX_T1S;
3747     input_size_in_bits = EVEX_64bit;
3748     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3749   } else {
3750     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3751   }
3752 }
3753 
3754 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3755   assert(VM_Version::supports_avx(), "");
3756   if (VM_Version::supports_evex()) {
3757     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3758   } else {
3759     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3760   }
3761 }
3762 
3763 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
3764   assert(VM_Version::supports_avx(), "");
3765   if (VM_Version::supports_evex()) {
3766     tuple_type = EVEX_T1S;
3767     input_size_in_bits = EVEX_32bit;
3768   }
3769   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3770 }
3771 
3772 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3773   assert(VM_Version::supports_avx(), "");
3774   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3775 }
3776 
3777 //====================VECTOR ARITHMETIC=====================================
3778 
3779 // Float-point vector arithmetic
3780 
3781 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
3782   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3783   if (VM_Version::supports_evex()) {
3784     emit_simd_arith_q(0x58, dst, src, VEX_SIMD_66);
3785   } else {
3786     emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
3787   }
3788 }
3789 
3790 void Assembler::addps(XMMRegister dst, XMMRegister src) {
3791   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3792   emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
3793 }
3794 
3795 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3796   assert(VM_Version::supports_avx(), "");
3797   if (VM_Version::supports_evex()) {
3798     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
3799   } else {
3800     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
3801   }
3802 }
3803 
3804 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3805   assert(VM_Version::supports_avx(), "");
3806   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len);
3807 }
3808 
3809 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3810   assert(VM_Version::supports_avx(), "");
3811   if (VM_Version::supports_evex()) {
3812     tuple_type = EVEX_FV;
3813     input_size_in_bits = EVEX_64bit;
3814     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
3815   } else {
3816     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
3817   }
3818 }
3819 
3820 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3821   assert(VM_Version::supports_avx(), "");
3822   if (VM_Version::supports_evex()) {
3823     tuple_type = EVEX_FV;
3824     input_size_in_bits = EVEX_32bit;
3825   }
3826   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len);
3827 }
3828 
3829 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
3830   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3831   if (VM_Version::supports_evex()) {
3832     emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_66);
3833   } else {
3834     emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
3835   }
3836 }
3837 
3838 void Assembler::subps(XMMRegister dst, XMMRegister src) {
3839   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3840   emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
3841 }
3842 
3843 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3844   assert(VM_Version::supports_avx(), "");
3845   if (VM_Version::supports_evex()) {
3846     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
3847   } else {
3848     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
3849   }
3850 }
3851 
3852 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3853   assert(VM_Version::supports_avx(), "");
3854   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len);
3855 }
3856 
3857 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3858   assert(VM_Version::supports_avx(), "");
3859   if (VM_Version::supports_evex()) {
3860     tuple_type = EVEX_FV;
3861     input_size_in_bits = EVEX_64bit;
3862     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
3863   } else {
3864     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
3865   }
3866 }
3867 
3868 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3869   assert(VM_Version::supports_avx(), "");
3870   if (VM_Version::supports_evex()) {
3871     tuple_type = EVEX_FV;
3872     input_size_in_bits = EVEX_32bit;
3873   }
3874   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len);
3875 }
3876 
3877 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
3878   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3879   if (VM_Version::supports_evex()) {
3880     emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66);
3881   } else {
3882     emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
3883   }
3884 }
3885 
3886 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
3887   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3888   emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
3889 }
3890 
3891 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3892   assert(VM_Version::supports_avx(), "");
3893   if (VM_Version::supports_evex()) {
3894     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
3895   } else {
3896     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
3897   }
3898 }
3899 
3900 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3901   assert(VM_Version::supports_avx(), "");
3902   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len);
3903 }
3904 
3905 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3906   assert(VM_Version::supports_avx(), "");
3907   if (VM_Version::supports_evex()) {
3908     tuple_type = EVEX_FV;
3909     input_size_in_bits = EVEX_64bit;
3910     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
3911   } else {
3912     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
3913   }
3914 }
3915 
3916 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3917   assert(VM_Version::supports_avx(), "");
3918   if (VM_Version::supports_evex()) {
3919     tuple_type = EVEX_FV;
3920     input_size_in_bits = EVEX_32bit;
3921   }
3922   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len);
3923 }
3924 
3925 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
3926   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3927   if (VM_Version::supports_evex()) {
3928     emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_66);
3929   } else {
3930     emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
3931   }
3932 }
3933 
3934 void Assembler::divps(XMMRegister dst, XMMRegister src) {
3935   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3936   emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
3937 }
3938 
3939 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3940   assert(VM_Version::supports_avx(), "");
3941   if (VM_Version::supports_evex()) {
3942     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
3943   } else {
3944     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
3945   }
3946 }
3947 
3948 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3949   assert(VM_Version::supports_avx(), "");
3950   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len);
3951 }
3952 
3953 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3954   assert(VM_Version::supports_avx(), "");
3955   if (VM_Version::supports_evex()) {
3956     tuple_type = EVEX_FV;
3957     input_size_in_bits = EVEX_64bit;
3958     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
3959   } else {
3960     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
3961   }
3962 }
3963 
3964 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3965   assert(VM_Version::supports_avx(), "");
3966   if (VM_Version::supports_evex()) {
3967     tuple_type = EVEX_FV;
3968     input_size_in_bits = EVEX_32bit;
3969   }
3970   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len);
3971 }
3972 
3973 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
3974   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3975   if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
3976     emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66);
3977   } else {
3978     emit_simd_arith(0x54, dst, src, VEX_SIMD_66, false, true);
3979   }
3980 }
3981 
3982 void Assembler::andps(XMMRegister dst, XMMRegister src) {
3983   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3984   emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, false,
3985                   (VM_Version::supports_avx512dq() == false));
3986 }
3987 
3988 void Assembler::andps(XMMRegister dst, Address src) {
3989   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3990   if (VM_Version::supports_evex()) {
3991     tuple_type = EVEX_FV;
3992     input_size_in_bits = EVEX_32bit;
3993   }
3994   emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE,
3995                   false, (VM_Version::supports_avx512dq() == false));
3996 }
3997 
3998 void Assembler::andpd(XMMRegister dst, Address src) {
3999   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4000   if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
4001     tuple_type = EVEX_FV;
4002     input_size_in_bits = EVEX_64bit;
4003     emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66);
4004   } else {
4005     emit_simd_arith(0x54, dst, src, VEX_SIMD_66, false, true);
4006   }
4007 }
4008 
4009 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4010   assert(VM_Version::supports_avx(), "");
4011   if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
4012     emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
4013   } else {
4014     emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, true);
4015   }
4016 }
4017 
4018 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4019   assert(VM_Version::supports_avx(), "");
4020   bool legacy_mode = (VM_Version::supports_avx512dq() == false);
4021   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, legacy_mode);
4022 }
4023 
4024 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4025   assert(VM_Version::supports_avx(), "");
4026   if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
4027     tuple_type = EVEX_FV;
4028     input_size_in_bits = EVEX_64bit;
4029     emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
4030   } else {
4031     emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, true);
4032   }
4033 }
4034 
4035 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4036   assert(VM_Version::supports_avx(), "");
4037   if (VM_Version::supports_evex()) {
4038     tuple_type = EVEX_FV;
4039     input_size_in_bits = EVEX_32bit;
4040   }
4041   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len,
4042                  (VM_Version::supports_avx512dq() == false));
4043 }
4044 
4045 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
4046   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4047   if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
4048     emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66);
4049   } else {
4050     emit_simd_arith(0x57, dst, src, VEX_SIMD_66, false, true);
4051   }
4052 }
4053 
4054 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
4055   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4056   emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE,
4057                   false, (VM_Version::supports_avx512dq() == false));
4058 }
4059 
4060 void Assembler::xorpd(XMMRegister dst, Address src) {
4061   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4062   if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
4063     tuple_type = EVEX_FV;
4064     input_size_in_bits = EVEX_64bit;
4065     emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66);
4066   } else {
4067     emit_simd_arith(0x57, dst, src, VEX_SIMD_66, false, true);
4068   }
4069 }
4070 
4071 void Assembler::xorps(XMMRegister dst, Address src) {
4072   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4073   if (VM_Version::supports_evex()) {
4074     tuple_type = EVEX_FV;
4075     input_size_in_bits = EVEX_32bit;
4076   }
4077   emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, false,
4078                   (VM_Version::supports_avx512dq() == false));
4079 }
4080 
4081 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4082   assert(VM_Version::supports_avx(), "");
4083   if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
4084     emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
4085   } else {
4086     emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, true);
4087   }
4088 }
4089 
4090 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4091   assert(VM_Version::supports_avx(), "");
4092   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len,
4093                  (VM_Version::supports_avx512dq() == false));
4094 }
4095 
4096 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4097   assert(VM_Version::supports_avx(), "");
4098   if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
4099     tuple_type = EVEX_FV;
4100     input_size_in_bits = EVEX_64bit;
4101     emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
4102   } else {
4103     emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, true);
4104   }
4105 }
4106 
4107 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4108   assert(VM_Version::supports_avx(), "");
4109   if (VM_Version::supports_evex()) {
4110     tuple_type = EVEX_FV;
4111     input_size_in_bits = EVEX_32bit;
4112   }
4113   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len,
4114                  (VM_Version::supports_avx512dq() == false));
4115 }
4116 

4117 // Integer vector arithmetic
4118 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4119   assert(VM_Version::supports_avx() && (vector_len == 0) ||
4120          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
4121   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len,
4122                                      VEX_OPCODE_0F_38, true, false);
4123   emit_int8(0x01);
4124   emit_int8((unsigned char)(0xC0 | encode));
4125 }
4126 
4127 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4128   assert(VM_Version::supports_avx() && (vector_len == 0) ||
4129          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
4130   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len,
4131                                      VEX_OPCODE_0F_38, true, false);
4132   emit_int8(0x02);
4133   emit_int8((unsigned char)(0xC0 | encode));
4134 }
4135 
4136 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
4137   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4138   emit_simd_arith(0xFC, dst, src, VEX_SIMD_66);
4139 }
4140 
4141 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
4142   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4143   emit_simd_arith(0xFD, dst, src, VEX_SIMD_66);
4144 }
4145 
4146 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
4147   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4148   emit_simd_arith(0xFE, dst, src, VEX_SIMD_66);
4149 }
4150 
4151 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
4152   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4153   if (VM_Version::supports_evex()) {
4154     emit_simd_arith_q(0xD4, dst, src, VEX_SIMD_66);
4155   } else {
4156     emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
4157   }
4158 }
4159 
4160 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
4161   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
4162   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
4163                                       VEX_OPCODE_0F_38, false, AVX_128bit, true);
4164   emit_int8(0x01);
4165   emit_int8((unsigned char)(0xC0 | encode));
4166 }
4167 
4168 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
4169   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
4170   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
4171                                       VEX_OPCODE_0F_38, false, AVX_128bit, true);
4172   emit_int8(0x02);
4173   emit_int8((unsigned char)(0xC0 | encode));
4174 }
4175 
4176 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4177   assert(UseAVX > 0, "requires some form of AVX");
4178   emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len,
4179                  (VM_Version::supports_avx512bw() == false));
4180 }
4181 
4182 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4183   assert(UseAVX > 0, "requires some form of AVX");
4184   emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len,
4185                  (VM_Version::supports_avx512bw() == false));
4186 }
4187 
4188 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4189   assert(UseAVX > 0, "requires some form of AVX");
4190   emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len);
4191 }
4192 
4193 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4194   assert(UseAVX > 0, "requires some form of AVX");
4195   if (VM_Version::supports_evex()) {
4196     emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4197   } else {
4198     emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4199   }
4200 }
4201 
4202 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4203   assert(UseAVX > 0, "requires some form of AVX");
4204   if (VM_Version::supports_evex()) {
4205     tuple_type = EVEX_FVM;
4206   }
4207   emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len);
4208 }
4209 
4210 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4211   assert(UseAVX > 0, "requires some form of AVX");
4212   if (VM_Version::supports_evex()) {
4213     tuple_type = EVEX_FVM;
4214   }
4215   emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len);
4216 }
4217 
4218 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4219   assert(UseAVX > 0, "requires some form of AVX");
4220   if (VM_Version::supports_evex()) {
4221     tuple_type = EVEX_FV;
4222     input_size_in_bits = EVEX_32bit;
4223   }
4224   emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len);
4225 }
4226 
4227 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4228   assert(UseAVX > 0, "requires some form of AVX");
4229   if (VM_Version::supports_evex()) {
4230     tuple_type = EVEX_FV;
4231     input_size_in_bits = EVEX_64bit;
4232     emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4233   } else {
4234     emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4235   }
4236 }
4237 
4238 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
4239   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4240   emit_simd_arith(0xF8, dst, src, VEX_SIMD_66);
4241 }
4242 
4243 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
4244   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4245   emit_simd_arith(0xF9, dst, src, VEX_SIMD_66);
4246 }
4247 
4248 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
4249   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4250   emit_simd_arith(0xFA, dst, src, VEX_SIMD_66);
4251 }
4252 
4253 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
4254   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4255   if (VM_Version::supports_evex()) {
4256     emit_simd_arith_q(0xFB, dst, src, VEX_SIMD_66);
4257   } else {
4258     emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
4259   }
4260 }
4261 
4262 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4263   assert(UseAVX > 0, "requires some form of AVX");
4264   emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len,
4265                  (VM_Version::supports_avx512bw() == false));
4266 }
4267 
4268 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4269   assert(UseAVX > 0, "requires some form of AVX");
4270   emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len,
4271                  (VM_Version::supports_avx512bw() == false));
4272 }
4273 
4274 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4275   assert(UseAVX > 0, "requires some form of AVX");
4276   emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len);
4277 }
4278 
4279 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4280   assert(UseAVX > 0, "requires some form of AVX");
4281   if (VM_Version::supports_evex()) {
4282     emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4283   } else {
4284     emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4285   }
4286 }
4287 
4288 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4289   assert(UseAVX > 0, "requires some form of AVX");
4290   if (VM_Version::supports_evex()) {
4291     tuple_type = EVEX_FVM;
4292   }
4293   emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len,
4294                  (VM_Version::supports_avx512bw() == false));
4295 }
4296 
4297 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4298   assert(UseAVX > 0, "requires some form of AVX");
4299   if (VM_Version::supports_evex()) {
4300     tuple_type = EVEX_FVM;
4301   }
4302   emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len,
4303                  (VM_Version::supports_avx512bw() == false));
4304 }
4305 
4306 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4307   assert(UseAVX > 0, "requires some form of AVX");
4308   if (VM_Version::supports_evex()) {
4309     tuple_type = EVEX_FV;
4310     input_size_in_bits = EVEX_32bit;
4311   }
4312   emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len);
4313 }
4314 
4315 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4316   assert(UseAVX > 0, "requires some form of AVX");
4317   if (VM_Version::supports_evex()) {
4318     tuple_type = EVEX_FV;
4319     input_size_in_bits = EVEX_64bit;
4320     emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4321   } else {
4322     emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4323   }
4324 }
4325 
4326 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
4327   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4328   emit_simd_arith(0xD5, dst, src, VEX_SIMD_66,
4329                   (VM_Version::supports_avx512bw() == false));
4330 }
4331 
4332 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
4333   assert(VM_Version::supports_sse4_1(), "");
4334   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66,
4335                                       false, VEX_OPCODE_0F_38);
4336   emit_int8(0x40);
4337   emit_int8((unsigned char)(0xC0 | encode));
4338 }
4339 
4340 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4341   assert(UseAVX > 0, "requires some form of AVX");
4342   emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len,
4343                  (VM_Version::supports_avx512bw() == false));
4344 }
4345 
4346 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4347   assert(UseAVX > 0, "requires some form of AVX");
4348   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66,
4349                                      vector_len, VEX_OPCODE_0F_38);
4350   emit_int8(0x40);
4351   emit_int8((unsigned char)(0xC0 | encode));
4352 }
4353 
4354 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4355   assert(UseAVX > 2, "requires some form of AVX");
4356   int src_enc = src->encoding();
4357   int dst_enc = dst->encoding();
4358   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4359   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66,
4360                                      VEX_OPCODE_0F_38, true, vector_len, false, false);
4361   emit_int8(0x40);
4362   emit_int8((unsigned char)(0xC0 | encode));
4363 }
4364 
4365 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4366   assert(UseAVX > 0, "requires some form of AVX");
4367   if (VM_Version::supports_evex()) {
4368     tuple_type = EVEX_FVM;
4369   }
4370   emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len);
4371 }
4372 
4373 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4374   assert(UseAVX > 0, "requires some form of AVX");
4375   if (VM_Version::supports_evex()) {
4376     tuple_type = EVEX_FV;
4377     input_size_in_bits = EVEX_32bit;
4378   }
4379   InstructionMark im(this);
4380   int dst_enc = dst->encoding();
4381   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4382   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66,
4383              VEX_OPCODE_0F_38, false, vector_len);
4384   emit_int8(0x40);
4385   emit_operand(dst, src);
4386 }
4387 
4388 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4389   assert(UseAVX > 0, "requires some form of AVX");
4390   if (VM_Version::supports_evex()) {
4391     tuple_type = EVEX_FV;
4392     input_size_in_bits = EVEX_64bit;
4393   }
4394   InstructionMark im(this);
4395   int dst_enc = dst->encoding();
4396   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4397   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, true, vector_len);
4398   emit_int8(0x40);
4399   emit_operand(dst, src);
4400 }
4401 
4402 // Shift packed integers left by specified number of bits.
4403 void Assembler::psllw(XMMRegister dst, int shift) {
4404   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4405   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
4406   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F,
4407                                       false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
4408   emit_int8(0x71);
4409   emit_int8((unsigned char)(0xC0 | encode));
4410   emit_int8(shift & 0xFF);
4411 }
4412 
4413 void Assembler::pslld(XMMRegister dst, int shift) {
4414   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4415   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
4416   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, false);
4417   emit_int8(0x72);
4418   emit_int8((unsigned char)(0xC0 | encode));
4419   emit_int8(shift & 0xFF);
4420 }
4421 
4422 void Assembler::psllq(XMMRegister dst, int shift) {
4423   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4424   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
4425   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F, true);
4426   emit_int8(0x73);
4427   emit_int8((unsigned char)(0xC0 | encode));
4428   emit_int8(shift & 0xFF);
4429 }
4430 
4431 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
4432   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4433   emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66, false,
4434                   (VM_Version::supports_avx512bw() == false));
4435 }
4436 
4437 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
4438   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4439   emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66);
4440 }
4441 
4442 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
4443   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4444   if (VM_Version::supports_evex()) {
4445     emit_simd_arith_q(0xF3, dst, shift, VEX_SIMD_66);
4446   } else {
4447     emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
4448   }
4449 }
4450 
4451 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4452   assert(UseAVX > 0, "requires some form of AVX");
4453   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
4454   emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector_len,
4455                  (VM_Version::supports_avx512bw() == false));
4456   emit_int8(shift & 0xFF);
4457 }
4458 
4459 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4460   assert(UseAVX > 0, "requires some form of AVX");
4461   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
4462   emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector_len);
4463   emit_int8(shift & 0xFF);
4464 }
4465 
4466 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4467   assert(UseAVX > 0, "requires some form of AVX");
4468   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
4469   if (VM_Version::supports_evex()) {
4470     emit_vex_arith_q(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len);
4471   } else {
4472     emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len);
4473   }
4474   emit_int8(shift & 0xFF);
4475 }
4476 
4477 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4478   assert(UseAVX > 0, "requires some form of AVX");
4479   emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector_len,
4480                  (VM_Version::supports_avx512bw() == false));
4481 }
4482 
4483 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4484   assert(UseAVX > 0, "requires some form of AVX");
4485   emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector_len);
4486 }
4487 
4488 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4489   assert(UseAVX > 0, "requires some form of AVX");
4490   if (VM_Version::supports_evex()) {
4491     emit_vex_arith_q(0xF3, dst, src, shift, VEX_SIMD_66, vector_len);
4492   } else {
4493     emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector_len);
4494   }
4495 }
4496 
4497 // Shift packed integers logically right by specified number of bits.
4498 void Assembler::psrlw(XMMRegister dst, int shift) {
4499   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4500   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
4501   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F,
4502                                       (VM_Version::supports_avx512bw() == false));
4503   emit_int8(0x71);
4504   emit_int8((unsigned char)(0xC0 | encode));
4505   emit_int8(shift & 0xFF);
4506 }
4507 
4508 void Assembler::psrld(XMMRegister dst, int shift) {
4509   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4510   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
4511   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, false);
4512   emit_int8(0x72);
4513   emit_int8((unsigned char)(0xC0 | encode));
4514   emit_int8(shift & 0xFF);
4515 }
4516 
4517 void Assembler::psrlq(XMMRegister dst, int shift) {
4518   // Do not confuse it with psrldq SSE2 instruction which
4519   // shifts 128 bit value in xmm register by number of bytes.
4520   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4521   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
4522   int encode = 0;
4523   if (VM_Version::supports_evex() && VM_Version::supports_avx512bw()) {
4524     encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, true, VEX_OPCODE_0F, false);
4525   } else {
4526     encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F, true);
4527   }
4528   emit_int8(0x73);
4529   emit_int8((unsigned char)(0xC0 | encode));
4530   emit_int8(shift & 0xFF);
4531 }
4532 
4533 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
4534   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4535   emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66, false,
4536                   (VM_Version::supports_avx512bw() == false));
4537 }
4538 
4539 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
4540   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4541   emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66);
4542 }
4543 
4544 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
4545   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4546   if (VM_Version::supports_evex()) {
4547     emit_simd_arith_q(0xD3, dst, shift, VEX_SIMD_66);
4548   } else {
4549     emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
4550   }
4551 }
4552 
4553 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4554   assert(UseAVX > 0, "requires some form of AVX");
4555   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
4556   emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector_len,
4557                  (VM_Version::supports_avx512bw() == false));
4558   emit_int8(shift & 0xFF);
4559 }
4560 
4561 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4562   assert(UseAVX > 0, "requires some form of AVX");
4563   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
4564   emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector_len);
4565   emit_int8(shift & 0xFF);
4566 }
4567 
4568 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4569   assert(UseAVX > 0, "requires some form of AVX");
4570   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
4571   if (VM_Version::supports_evex()) {
4572     emit_vex_arith_q(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len);
4573   } else {
4574     emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len);
4575   }
4576   emit_int8(shift & 0xFF);
4577 }
4578 
4579 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4580   assert(UseAVX > 0, "requires some form of AVX");
4581   emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector_len,
4582                  (VM_Version::supports_avx512bw() == false));
4583 }
4584 
4585 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4586   assert(UseAVX > 0, "requires some form of AVX");
4587   emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector_len);
4588 }
4589 
4590 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4591   assert(UseAVX > 0, "requires some form of AVX");
4592   if (VM_Version::supports_evex()) {
4593     emit_vex_arith_q(0xD3, dst, src, shift, VEX_SIMD_66, vector_len);
4594   } else {
4595     emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector_len);
4596   }
4597 }
4598 
4599 // Shift packed integers arithmetically right by specified number of bits.
4600 void Assembler::psraw(XMMRegister dst, int shift) {
4601   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4602   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
4603   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F,
4604                                       (VM_Version::supports_avx512bw() == false));
4605   emit_int8(0x71);
4606   emit_int8((unsigned char)(0xC0 | encode));
4607   emit_int8(shift & 0xFF);
4608 }
4609 
4610 void Assembler::psrad(XMMRegister dst, int shift) {
4611   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4612   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
4613   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, false);
4614   emit_int8(0x72);
4615   emit_int8((unsigned char)(0xC0 | encode));
4616   emit_int8(shift & 0xFF);
4617 }
4618 
4619 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
4620   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4621   emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66,
4622                   (VM_Version::supports_avx512bw() == false));
4623 }
4624 
4625 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
4626   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4627   emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
4628 }
4629 
4630 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4631   assert(UseAVX > 0, "requires some form of AVX");
4632   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
4633   emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector_len,
4634                  (VM_Version::supports_avx512bw() == false));
4635   emit_int8(shift & 0xFF);
4636 }
4637 
4638 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4639   assert(UseAVX > 0, "requires some form of AVX");
4640   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
4641   emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector_len);
4642   emit_int8(shift & 0xFF);
4643 }
4644 
4645 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4646   assert(UseAVX > 0, "requires some form of AVX");
4647   emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector_len,
4648                  (VM_Version::supports_avx512bw() == false));
4649 }
4650 
4651 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4652   assert(UseAVX > 0, "requires some form of AVX");
4653   emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector_len);
4654 }
4655 
4656 
4657 // AND packed integers
4658 void Assembler::pand(XMMRegister dst, XMMRegister src) {
4659   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4660   emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
4661 }
4662 
4663 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4664   assert(UseAVX > 0, "requires some form of AVX");
4665   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
4666 }
4667 
4668 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4669   assert(UseAVX > 0, "requires some form of AVX");
4670   if (VM_Version::supports_evex()) {
4671     tuple_type = EVEX_FV;
4672     input_size_in_bits = EVEX_32bit;
4673   }
4674   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
4675 }
4676 
4677 void Assembler::por(XMMRegister dst, XMMRegister src) {
4678   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4679   emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
4680 }
4681 
4682 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4683   assert(UseAVX > 0, "requires some form of AVX");
4684   emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len);
4685 }
4686 
4687 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4688   assert(UseAVX > 0, "requires some form of AVX");
4689   if (VM_Version::supports_evex()) {
4690     tuple_type = EVEX_FV;
4691     input_size_in_bits = EVEX_32bit;
4692   }
4693   emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len);
4694 }
4695 
4696 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
4697   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4698   emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
4699 }
4700 
4701 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4702   assert(UseAVX > 0, "requires some form of AVX");
4703   emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len);
4704 }
4705 
4706 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4707   assert(UseAVX > 0, "requires some form of AVX");
4708   if (VM_Version::supports_evex()) {
4709     tuple_type = EVEX_FV;
4710     input_size_in_bits = EVEX_32bit;
4711   }
4712   emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len);
4713 }
4714 
4715 
4716 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4717   assert(VM_Version::supports_avx(), "");
4718   int vector_len = AVX_256bit;
4719   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
4720   emit_int8(0x18);
4721   emit_int8((unsigned char)(0xC0 | encode));
4722   // 0x00 - insert into lower 128 bits
4723   // 0x01 - insert into upper 128 bits
4724   emit_int8(0x01);
4725 }
4726 
4727 void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4728   assert(VM_Version::supports_evex(), "");
4729   int vector_len = AVX_512bit;
4730   int src_enc = src->encoding();
4731   int dst_enc = dst->encoding();
4732   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4733   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66,
4734                                      VEX_OPCODE_0F_3A, true, vector_len, false, false);
4735   emit_int8(0x1A);
4736   emit_int8((unsigned char)(0xC0 | encode));
4737   // 0x00 - insert into lower 256 bits
4738   // 0x01 - insert into upper 256 bits
4739   emit_int8(0x01);
4740 }
4741 
4742 void Assembler::vinsertf64x4h(XMMRegister dst, Address src) {
4743   assert(VM_Version::supports_avx(), "");
4744   if (VM_Version::supports_evex()) {
4745     tuple_type = EVEX_T4;
4746     input_size_in_bits = EVEX_64bit;
4747   }
4748   InstructionMark im(this);
4749   int vector_len = AVX_512bit;
4750   assert(dst != xnoreg, "sanity");
4751   int dst_enc = dst->encoding();
4752   // swap src<->dst for encoding
4753   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, true, vector_len);
4754   emit_int8(0x1A);
4755   emit_operand(dst, src);
4756   // 0x01 - insert into upper 128 bits
4757   emit_int8(0x01);
4758 }
4759 
4760 void Assembler::vinsertf128h(XMMRegister dst, Address src) {
4761   assert(VM_Version::supports_avx(), "");
4762   if (VM_Version::supports_evex()) {
4763     tuple_type = EVEX_T4;
4764     input_size_in_bits = EVEX_32bit;
4765   }
4766   InstructionMark im(this);
4767   int vector_len = AVX_256bit;
4768   assert(dst != xnoreg, "sanity");
4769   int dst_enc = dst->encoding();
4770   // swap src<->dst for encoding
4771   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len);
4772   emit_int8(0x18);
4773   emit_operand(dst, src);
4774   // 0x01 - insert into upper 128 bits
4775   emit_int8(0x01);
4776 }
4777 
4778 void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) {
4779   assert(VM_Version::supports_avx(), "");
4780   int vector_len = AVX_256bit;
4781   int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
4782   emit_int8(0x19);
4783   emit_int8((unsigned char)(0xC0 | encode));
4784   // 0x00 - insert into lower 128 bits
4785   // 0x01 - insert into upper 128 bits
4786   emit_int8(0x01);
4787 }
4788 
4789 void Assembler::vextractf128h(Address dst, XMMRegister src) {
4790   assert(VM_Version::supports_avx(), "");
4791   if (VM_Version::supports_evex()) {
4792     tuple_type = EVEX_T4;
4793     input_size_in_bits = EVEX_32bit;
4794   }
4795   InstructionMark im(this);
4796   int vector_len = AVX_256bit;
4797   assert(src != xnoreg, "sanity");
4798   int src_enc = src->encoding();
4799   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len);
4800   emit_int8(0x19);
4801   emit_operand(src, dst);
4802   // 0x01 - extract from upper 128 bits
4803   emit_int8(0x01);
4804 }
4805 
4806 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4807   assert(VM_Version::supports_avx2(), "");
4808   int vector_len = AVX_256bit;
4809   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
4810   emit_int8(0x38);
4811   emit_int8((unsigned char)(0xC0 | encode));
4812   // 0x00 - insert into lower 128 bits
4813   // 0x01 - insert into upper 128 bits
4814   emit_int8(0x01);
4815 }
4816 
4817 void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4818   assert(VM_Version::supports_evex(), "");
4819   int vector_len = AVX_512bit;
4820   int src_enc = src->encoding();
4821   int dst_enc = dst->encoding();
4822   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4823   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
4824                                      VM_Version::supports_avx512dq(), vector_len, false, false);
4825   emit_int8(0x38);
4826   emit_int8((unsigned char)(0xC0 | encode));
4827   // 0x00 - insert into lower 256 bits
4828   // 0x01 - insert into upper 256 bits
4829   emit_int8(0x01);
4830 }
4831 
4832 void Assembler::vinserti128h(XMMRegister dst, Address src) {
4833   assert(VM_Version::supports_avx2(), "");
4834   if (VM_Version::supports_evex()) {
4835     tuple_type = EVEX_T4;
4836     input_size_in_bits = EVEX_32bit;
4837   }
4838   InstructionMark im(this);
4839   int vector_len = AVX_256bit;
4840   assert(dst != xnoreg, "sanity");
4841   int dst_enc = dst->encoding();
4842   // swap src<->dst for encoding
4843   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len);
4844   emit_int8(0x38);
4845   emit_operand(dst, src);
4846   // 0x01 - insert into upper 128 bits
4847   emit_int8(0x01);
4848 }
4849 
4850 void Assembler::vextracti128h(XMMRegister dst, XMMRegister src) {
4851   assert(VM_Version::supports_avx(), "");
4852   int vector_len = AVX_256bit;
4853   int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
4854   emit_int8(0x39);
4855   emit_int8((unsigned char)(0xC0 | encode));
4856   // 0x00 - insert into lower 128 bits
4857   // 0x01 - insert into upper 128 bits
4858   emit_int8(0x01);
4859 }
4860 
4861 void Assembler::vextracti128h(Address dst, XMMRegister src) {
4862   assert(VM_Version::supports_avx2(), "");
4863   if (VM_Version::supports_evex()) {
4864     tuple_type = EVEX_T4;
4865     input_size_in_bits = EVEX_32bit;
4866   }
4867   InstructionMark im(this);
4868   int vector_len = AVX_256bit;
4869   assert(src != xnoreg, "sanity");
4870   int src_enc = src->encoding();
4871   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len);
4872   emit_int8(0x39);
4873   emit_operand(src, dst);
4874   // 0x01 - extract from upper 128 bits
4875   emit_int8(0x01);
4876 }
4877 
4878 void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src) {
4879   assert(VM_Version::supports_evex(), "");
4880   int vector_len = AVX_512bit;
4881   int src_enc = src->encoding();
4882   int dst_enc = dst->encoding();
4883   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
4884                                      true, vector_len, false, false);
4885   emit_int8(0x3B);
4886   emit_int8((unsigned char)(0xC0 | encode));
4887   // 0x01 - extract from upper 256 bits
4888   emit_int8(0x01);
4889 }
4890 
4891 void Assembler::vextracti64x2h(XMMRegister dst, XMMRegister src, int value) {
4892   assert(VM_Version::supports_evex(), "");
4893   int vector_len = AVX_512bit;
4894   int src_enc = src->encoding();
4895   int dst_enc = dst->encoding();
4896   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
4897                                      VM_Version::supports_avx512dq(), vector_len, false, false);
4898   emit_int8(0x39);
4899   emit_int8((unsigned char)(0xC0 | encode));
4900   // 0x01 - extract from bits 255:128
4901   // 0x02 - extract from bits 383:256
4902   // 0x03 - extract from bits 511:384
4903   emit_int8(value & 0x3);
4904 }
4905 
4906 void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src) {
4907   assert(VM_Version::supports_evex(), "");
4908   int vector_len = AVX_512bit;
4909   int src_enc = src->encoding();
4910   int dst_enc = dst->encoding();
4911   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
4912                                      VM_Version::supports_avx512dq(), vector_len, false, false);
4913   emit_int8(0x1B);
4914   emit_int8((unsigned char)(0xC0 | encode));
4915   // 0x01 - extract from upper 256 bits
4916   emit_int8(0x01);
4917 }
4918 
4919 void Assembler::vextractf64x4h(Address dst, XMMRegister src) {
4920   assert(VM_Version::supports_avx2(), "");
4921   tuple_type = EVEX_T4;
4922   input_size_in_bits = EVEX_64bit;
4923   InstructionMark im(this);
4924   int vector_len = AVX_512bit;
4925   assert(src != xnoreg, "sanity");
4926   int src_enc = src->encoding();
4927   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
4928              VM_Version::supports_avx512dq(), vector_len);
4929   emit_int8(0x1B);
4930   emit_operand(src, dst);
4931   // 0x01 - extract from upper 128 bits
4932   emit_int8(0x01);
4933 }
4934 
4935 void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) {
4936   assert(VM_Version::supports_evex(), "");
4937   int vector_len = AVX_512bit;
4938   int src_enc = src->encoding();
4939   int dst_enc = dst->encoding();
4940   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66,
4941                                      VEX_OPCODE_0F_3A, false, vector_len, false, false);
4942   emit_int8(0x19);
4943   emit_int8((unsigned char)(0xC0 | encode));
4944   // 0x01 - extract from bits 255:128
4945   // 0x02 - extract from bits 383:256
4946   // 0x03 - extract from bits 511:384
4947   emit_int8(value & 0x3);
4948 }
4949 
4950 void Assembler::vextractf64x2h(XMMRegister dst, XMMRegister src, int value) {
4951   assert(VM_Version::supports_evex(), "");
4952   int vector_len = AVX_512bit;
4953   int src_enc = src->encoding();
4954   int dst_enc = dst->encoding();
4955   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
4956                                      VM_Version::supports_avx512dq(), vector_len, false, false);
4957   emit_int8(0x19);
4958   emit_int8((unsigned char)(0xC0 | encode));
4959   // 0x01 - extract from bits 255:128
4960   // 0x02 - extract from bits 383:256
4961   // 0x03 - extract from bits 511:384
4962   emit_int8(value & 0x3);
4963 }
4964 
4965 // duplicate 4-bytes integer data from src into 8 locations in dest
4966 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
4967   assert(VM_Version::supports_avx2(), "");
4968   int vector_len = AVX_256bit;
4969   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
4970                                      vector_len, VEX_OPCODE_0F_38, false);
4971   emit_int8(0x58);
4972   emit_int8((unsigned char)(0xC0 | encode));
4973 }
4974 
4975 // duplicate 4-bytes integer data from src into 8 locations in dest
4976 void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
4977   assert(VM_Version::supports_evex(), "");
4978   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
4979                                      vector_len, VEX_OPCODE_0F_38, false);
4980   emit_int8(0x58);
4981   emit_int8((unsigned char)(0xC0 | encode));
4982 }
4983 
4984 // Carry-Less Multiplication Quadword
4985 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
4986   assert(VM_Version::supports_clmul(), "");
4987   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
4988                                       VEX_OPCODE_0F_3A, false, AVX_128bit, true);
4989   emit_int8(0x44);
4990   emit_int8((unsigned char)(0xC0 | encode));
4991   emit_int8((unsigned char)mask);
4992 }
4993 
4994 // Carry-Less Multiplication Quadword
4995 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
4996   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
4997   int vector_len = AVX_128bit;
4998   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66,
4999                                      vector_len, VEX_OPCODE_0F_3A, true);
5000   emit_int8(0x44);
5001   emit_int8((unsigned char)(0xC0 | encode));
5002   emit_int8((unsigned char)mask);
5003 }
5004 
5005 void Assembler::vzeroupper() {
5006   assert(VM_Version::supports_avx(), "");
5007   if (UseAVX < 3)
5008   {
5009     (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
5010     emit_int8(0x77);
5011   }
5012 }
5013 
5014 
5015 #ifndef _LP64
5016 // 32bit only pieces of the assembler
5017 
5018 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
5019   // NO PREFIX AS NEVER 64BIT
5020   InstructionMark im(this);
5021   emit_int8((unsigned char)0x81);
5022   emit_int8((unsigned char)(0xF8 | src1->encoding()));
5023   emit_data(imm32, rspec, 0);
5024 }
5025 
5026 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
5027   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
5028   InstructionMark im(this);
5029   emit_int8((unsigned char)0x81);
5030   emit_operand(rdi, src1);
5031   emit_data(imm32, rspec, 0);

5508   }
5509 }
5510 
5511 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
5512   if (pre > 0) {
5513     emit_int8(simd_pre[pre]);
5514   }
5515   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
5516                           prefix_and_encode(dst_enc, src_enc);
5517   if (opc > 0) {
5518     emit_int8(0x0F);
5519     int opc2 = simd_opc[opc];
5520     if (opc2 > 0) {
5521       emit_int8(opc2);
5522     }
5523   }
5524   return encode;
5525 }
5526 
5527 
5528 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, int vector_len) {
5529   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
5530     prefix(VEX_3bytes);
5531 
5532     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
5533     byte1 = (~byte1) & 0xE0;
5534     byte1 |= opc;
5535     emit_int8(byte1);
5536 
5537     int byte2 = ((~nds_enc) & 0xf) << 3;
5538     byte2 |= (vex_w ? VEX_W : 0) | ((vector_len > 0) ? 4 : 0) | pre;
5539     emit_int8(byte2);
5540   } else {
5541     prefix(VEX_2bytes);
5542 
5543     int byte1 = vex_r ? VEX_R : 0;
5544     byte1 = (~byte1) & 0x80;
5545     byte1 |= ((~nds_enc) & 0xf) << 3;
5546     byte1 |= ((vector_len > 0 ) ? 4 : 0) | pre;
5547     emit_int8(byte1);
5548   }
5549 }
5550 
5551 // This is a 4 byte encoding
5552 void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v,
5553                             int nds_enc, VexSimdPrefix pre, VexOpcode opc,
5554                             bool is_extended_context, bool is_merge_context,
5555                             int vector_len, bool no_mask_reg ){
5556   // EVEX 0x62 prefix
5557   prefix(EVEX_4bytes);
5558   evex_encoding = (vex_w ? VEX_W : 0) | (evex_r ? EVEX_Rb : 0);
5559 
5560   // P0: byte 2, initialized to RXBR`00mm
5561   // instead of not'd
5562   int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0);
5563   byte2 = (~byte2) & 0xF0;
5564   // confine opc opcode extensions in mm bits to lower two bits
5565   // of form {0F, 0F_38, 0F_3A}
5566   byte2 |= opc;
5567   emit_int8(byte2);
5568 
5569   // P1: byte 3 as Wvvvv1pp
5570   int byte3 = ((~nds_enc) & 0xf) << 3;
5571   // p[10] is always 1
5572   byte3 |= EVEX_F;
5573   byte3 |= (vex_w & 1) << 7;
5574   // confine pre opcode extensions in pp bits to lower two bits
5575   // of form {66, F3, F2}
5576   byte3 |= pre;
5577   emit_int8(byte3);
5578 
5579   // P2: byte 4 as zL'Lbv'aaa
5580   int byte4 = (no_mask_reg) ? 0 : 1; // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
5581   // EVEX.v` for extending EVEX.vvvv or VIDX
5582   byte4 |= (evex_v ? 0: EVEX_V);
5583   // third EXEC.b for broadcast actions
5584   byte4 |= (is_extended_context ? EVEX_Rb : 0);
5585   // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
5586   byte4 |= ((vector_len) & 0x3) << 5;
5587   // last is EVEX.z for zero/merge actions
5588   byte4 |= (is_merge_context ? EVEX_Z : 0);
5589   emit_int8(byte4);
5590 }
5591 
5592 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre,
5593                            VexOpcode opc, bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg) {
5594   bool vex_r = (xreg_enc >= 8);
5595   bool vex_b = adr.base_needs_rex();
5596   bool vex_x = adr.index_needs_rex();
5597   avx_vector_len = vector_len;
5598 
5599   // if vector length is turned off, revert to AVX for vectors smaller than AVX_512bit
5600   if (VM_Version::supports_avx512vl() == false) {
5601     switch (vector_len) {
5602     case AVX_128bit:
5603     case AVX_256bit:
5604       legacy_mode = true;
5605       break;
5606     }
5607   }
5608 
5609   if ((UseAVX > 2) && (legacy_mode == false))
5610   {
5611     bool evex_r = (xreg_enc >= 16);
5612     bool evex_v = (nds_enc >= 16);
5613     is_evex_instruction = true;
5614     evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg);
5615   } else {
5616     vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len);
5617   }
5618 }
5619 
5620 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
5621                                      bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg ) {
5622   bool vex_r = (dst_enc >= 8);
5623   bool vex_b = (src_enc >= 8);
5624   bool vex_x = false;
5625   avx_vector_len = vector_len;
5626 
5627   // if vector length is turned off, revert to AVX for vectors smaller than AVX_512bit
5628   if (VM_Version::supports_avx512vl() == false) {
5629     switch (vector_len) {
5630     case AVX_128bit:
5631     case AVX_256bit:
5632       legacy_mode = true;
5633       break;
5634     }
5635   }
5636 
5637   if ((UseAVX > 2) && (legacy_mode == false))
5638   {
5639     bool evex_r = (dst_enc >= 16);
5640     bool evex_v = (nds_enc >= 16);
5641     // can use vex_x as bank extender on rm encoding
5642     vex_x = (src_enc >= 16);
5643     evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg);
5644   } else {
5645     vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len);
5646   }
5647 
5648   // return modrm byte components for operands
5649   return (((dst_enc & 7) << 3) | (src_enc & 7));
5650 }
5651 
5652 
5653 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
5654                             bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) {
5655   if (UseAVX > 0) {
5656     int xreg_enc = xreg->encoding();
5657     int  nds_enc = nds->is_valid() ? nds->encoding() : 0;
5658     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg);
5659   } else {
5660     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
5661     rex_prefix(adr, xreg, pre, opc, rex_w);
5662   }
5663 }
5664 
5665 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
5666                                       bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) {
5667   int dst_enc = dst->encoding();
5668   int src_enc = src->encoding();
5669   if (UseAVX > 0) {
5670     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
5671     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg);
5672   } else {
5673     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
5674     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
5675   }
5676 }
5677 
5678 int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre,
5679                                       bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) {
5680   int dst_enc = dst->encoding();
5681   int src_enc = src->encoding();
5682   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
5683   return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg);
5684 }
5685 
5686 int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre,
5687                                       bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) {
5688   int dst_enc = dst->encoding();
5689   int src_enc = src->encoding();
5690   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
5691   return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg);
5692 }
5693 
5694 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
5695   InstructionMark im(this);
5696   simd_prefix(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
5697   emit_int8(opcode);
5698   emit_operand(dst, src);
5699 }
5700 
5701 void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg) {
5702   InstructionMark im(this);
5703   simd_prefix_q(dst, dst, src, pre, no_mask_reg);
5704   emit_int8(opcode);
5705   emit_operand(dst, src);
5706 }
5707 
5708 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
5709   int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
5710   emit_int8(opcode);
5711   emit_int8((unsigned char)(0xC0 | encode));
5712 }
5713 
5714 void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
5715   int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, true, AVX_128bit);
5716   emit_int8(opcode);
5717   emit_int8((unsigned char)(0xC0 | encode));
5718 }
5719 
5720 // Versions with no second source register (non-destructive source).
5721 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) {
5722   InstructionMark im(this);
5723   simd_prefix(dst, xnoreg, src, pre, opNoRegMask);
5724   emit_int8(opcode);
5725   emit_operand(dst, src);
5726 }
5727 
5728 void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) {
5729   InstructionMark im(this);
5730   simd_prefix_q(dst, xnoreg, src, pre, opNoRegMask);
5731   emit_int8(opcode);
5732   emit_operand(dst, src);
5733 }
5734 
5735 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
5736   int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, legacy_mode, AVX_128bit);
5737   emit_int8(opcode);
5738   emit_int8((unsigned char)(0xC0 | encode));
5739 }
5740 
5741 void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
5742   int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, true, AVX_128bit);
5743   emit_int8(opcode);
5744   emit_int8((unsigned char)(0xC0 | encode));
5745 }
5746 
5747 // 3-operands AVX instructions
5748 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, Address src,
5749                                VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) {
5750   InstructionMark im(this);
5751   vex_prefix(dst, nds, src, pre, vector_len, no_mask_reg, legacy_mode);
5752   emit_int8(opcode);
5753   emit_operand(dst, src);
5754 }
5755 
5756 void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
5757                                  Address src, VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
5758   InstructionMark im(this);
5759   vex_prefix_q(dst, nds, src, pre, vector_len, no_mask_reg);
5760   emit_int8(opcode);
5761   emit_operand(dst, src);
5762 }
5763 
5764 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src,
5765                                VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) {
5766   int encode = vex_prefix_and_encode(dst, nds, src, pre, vector_len, VEX_OPCODE_0F, false, no_mask_reg);
5767   emit_int8(opcode);
5768   emit_int8((unsigned char)(0xC0 | encode));
5769 }
5770 
5771 void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src,
5772                                  VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
5773   int src_enc = src->encoding();
5774   int dst_enc = dst->encoding();
5775   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
5776   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
5777   emit_int8(opcode);
5778   emit_int8((unsigned char)(0xC0 | encode));
5779 }
5780 
5781 #ifndef _LP64
5782 
5783 void Assembler::incl(Register dst) {
5784   // Don't use it directly. Use MacroAssembler::incrementl() instead.
5785   emit_int8(0x40 | dst->encoding());
5786 }
5787 
5788 void Assembler::lea(Register dst, Address src) {
5789   leal(dst, src);
5790 }
5791 
5792 void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
5793   InstructionMark im(this);
5794   emit_int8((unsigned char)0xC7);
5795   emit_operand(rax, dst);
5796   emit_data((int)imm32, rspec, 0);

6254 void Assembler::andq(Register dst, Address src) {
6255   InstructionMark im(this);
6256   prefixq(src, dst);
6257   emit_int8(0x23);
6258   emit_operand(dst, src);
6259 }
6260 
6261 void Assembler::andq(Register dst, Register src) {
6262   (void) prefixq_and_encode(dst->encoding(), src->encoding());
6263   emit_arith(0x23, 0xC0, dst, src);
6264 }
6265 
6266 void Assembler::andnq(Register dst, Register src1, Register src2) {
6267   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6268   int encode = vex_prefix_0F38_and_encode_q(dst, src1, src2);
6269   emit_int8((unsigned char)0xF2);
6270   emit_int8((unsigned char)(0xC0 | encode));
6271 }
6272 
6273 void Assembler::andnq(Register dst, Register src1, Address src2) {
6274   if (VM_Version::supports_evex()) {
6275     tuple_type = EVEX_T1S;
6276     input_size_in_bits = EVEX_64bit;
6277   }
6278   InstructionMark im(this);
6279   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6280   vex_prefix_0F38_q(dst, src1, src2);
6281   emit_int8((unsigned char)0xF2);
6282   emit_operand(dst, src2);
6283 }
6284 
6285 void Assembler::bsfq(Register dst, Register src) {
6286   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6287   emit_int8(0x0F);
6288   emit_int8((unsigned char)0xBC);
6289   emit_int8((unsigned char)(0xC0 | encode));
6290 }
6291 
6292 void Assembler::bsrq(Register dst, Register src) {
6293   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6294   emit_int8(0x0F);
6295   emit_int8((unsigned char)0xBD);
6296   emit_int8((unsigned char)(0xC0 | encode));
6297 }

6399   emit_arith(0x3B, 0xC0, dst, src);
6400 }
6401 
6402 void Assembler::cmpq(Register dst, Address  src) {
6403   InstructionMark im(this);
6404   prefixq(src, dst);
6405   emit_int8(0x3B);
6406   emit_operand(dst, src);
6407 }
6408 
6409 void Assembler::cmpxchgq(Register reg, Address adr) {
6410   InstructionMark im(this);
6411   prefixq(adr, reg);
6412   emit_int8(0x0F);
6413   emit_int8((unsigned char)0xB1);
6414   emit_operand(reg, adr);
6415 }
6416 
6417 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
6418   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6419   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2, true);
6420   emit_int8(0x2A);
6421   emit_int8((unsigned char)(0xC0 | encode));
6422 }
6423 
6424 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
6425   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6426   if (VM_Version::supports_evex()) {
6427     tuple_type = EVEX_T1S;
6428     input_size_in_bits = EVEX_32bit;
6429   }
6430   InstructionMark im(this);
6431   simd_prefix_q(dst, dst, src, VEX_SIMD_F2, true);
6432   emit_int8(0x2A);
6433   emit_operand(dst, src);
6434 }
6435 
6436 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
6437   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6438   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3, true);
6439   emit_int8(0x2A);
6440   emit_int8((unsigned char)(0xC0 | encode));
6441 }
6442 
6443 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
6444   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6445   if (VM_Version::supports_evex()) {
6446     tuple_type = EVEX_T1S;
6447     input_size_in_bits = EVEX_32bit;
6448   }
6449   InstructionMark im(this);
6450   simd_prefix_q(dst, dst, src, VEX_SIMD_F3, true);
6451   emit_int8(0x2A);
6452   emit_operand(dst, src);
6453 }
6454 
6455 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
6456   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6457   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, true);
6458   emit_int8(0x2C);
6459   emit_int8((unsigned char)(0xC0 | encode));
6460 }
6461 
6462 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
6463   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6464   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, true);
6465   emit_int8(0x2C);
6466   emit_int8((unsigned char)(0xC0 | encode));
6467 }
6468 
6469 void Assembler::decl(Register dst) {
6470   // Don't use it directly. Use MacroAssembler::decrementl() instead.
6471   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
6472   int encode = prefix_and_encode(dst->encoding());
6473   emit_int8((unsigned char)0xFF);
6474   emit_int8((unsigned char)(0xC8 | encode));
6475 }
6476 
6477 void Assembler::decq(Register dst) {
6478   // Don't use it directly. Use MacroAssembler::decrementq() instead.
6479   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
6480   int encode = prefixq_and_encode(dst->encoding());
6481   emit_int8((unsigned char)0xFF);
6482   emit_int8(0xC8 | encode);
6483 }
6484

6613 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
6614   InstructionMark im(this);
6615   prefix(src1);
6616   emit_int8((unsigned char)0x81);
6617   emit_operand(rax, src1, 4);
6618   emit_data((int)imm32, rspec, narrow_oop_operand);
6619 }
6620 
6621 void Assembler::lzcntq(Register dst, Register src) {
6622   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
6623   emit_int8((unsigned char)0xF3);
6624   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6625   emit_int8(0x0F);
6626   emit_int8((unsigned char)0xBD);
6627   emit_int8((unsigned char)(0xC0 | encode));
6628 }
6629 
6630 void Assembler::movdq(XMMRegister dst, Register src) {
6631   // table D-1 says MMX/SSE2
6632   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6633   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66, true);
6634   emit_int8(0x6E);
6635   emit_int8((unsigned char)(0xC0 | encode));
6636 }
6637 
6638 void Assembler::movdq(Register dst, XMMRegister src) {
6639   // table D-1 says MMX/SSE2
6640   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6641   // swap src/dst to get correct prefix
6642   int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66, true);
6643   emit_int8(0x7E);
6644   emit_int8((unsigned char)(0xC0 | encode));
6645 }
6646 
6647 void Assembler::movq(Register dst, Register src) {
6648   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6649   emit_int8((unsigned char)0x8B);
6650   emit_int8((unsigned char)(0xC0 | encode));
6651 }
6652 
6653 void Assembler::movq(Register dst, Address src) {
6654   InstructionMark im(this);
6655   prefixq(src, dst);
6656   emit_int8((unsigned char)0x8B);
6657   emit_operand(dst, src);
6658 }
6659 
6660 void Assembler::movq(Address dst, Register src) {
6661   InstructionMark im(this);
6662   prefixq(dst, src);

6755   emit_int8((unsigned char)0x0F);
6756   emit_int8((unsigned char)0xB7);
6757   emit_int8((unsigned char)(0xC0 | encode));
6758 }
6759 
6760 void Assembler::mulq(Address src) {
6761   InstructionMark im(this);
6762   prefixq(src);
6763   emit_int8((unsigned char)0xF7);
6764   emit_operand(rsp, src);
6765 }
6766 
6767 void Assembler::mulq(Register src) {
6768   int encode = prefixq_and_encode(src->encoding());
6769   emit_int8((unsigned char)0xF7);
6770   emit_int8((unsigned char)(0xE0 | encode));
6771 }
6772 
6773 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
6774   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
6775   int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(),
6776                                      VEX_SIMD_F2, VEX_OPCODE_0F_38, true, AVX_128bit, true, false);
6777   emit_int8((unsigned char)0xF6);
6778   emit_int8((unsigned char)(0xC0 | encode));
6779 }
6780 
6781 void Assembler::negq(Register dst) {
6782   int encode = prefixq_and_encode(dst->encoding());
6783   emit_int8((unsigned char)0xF7);
6784   emit_int8((unsigned char)(0xD8 | encode));
6785 }
6786 
6787 void Assembler::notq(Register dst) {
6788   int encode = prefixq_and_encode(dst->encoding());
6789   emit_int8((unsigned char)0xF7);
6790   emit_int8((unsigned char)(0xD0 | encode));
6791 }
6792 
6793 void Assembler::orq(Address dst, int32_t imm32) {
6794   InstructionMark im(this);
6795   prefixq(dst);
6796   emit_int8((unsigned char)0x81);

6905     emit_int8((unsigned char)(0xD0 | encode));
6906     emit_int8(imm8);
6907   }
6908 }
6909 
6910 void Assembler::rorq(Register dst, int imm8) {
6911   assert(isShiftCount(imm8 >> 1), "illegal shift count");
6912   int encode = prefixq_and_encode(dst->encoding());
6913   if (imm8 == 1) {
6914     emit_int8((unsigned char)0xD1);
6915     emit_int8((unsigned char)(0xC8 | encode));
6916   } else {
6917     emit_int8((unsigned char)0xC1);
6918     emit_int8((unsigned char)(0xc8 | encode));
6919     emit_int8(imm8);
6920   }
6921 }
6922 
6923 void Assembler::rorxq(Register dst, Register src, int imm8) {
6924   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
6925   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2,
6926                                      VEX_OPCODE_0F_3A, true, AVX_128bit, true, false);
6927   emit_int8((unsigned char)0xF0);
6928   emit_int8((unsigned char)(0xC0 | encode));
6929   emit_int8(imm8);
6930 }
6931 
6932 void Assembler::sarq(Register dst, int imm8) {
6933   assert(isShiftCount(imm8 >> 1), "illegal shift count");
6934   int encode = prefixq_and_encode(dst->encoding());
6935   if (imm8 == 1) {
6936     emit_int8((unsigned char)0xD1);
6937     emit_int8((unsigned char)(0xF8 | encode));
6938   } else {
6939     emit_int8((unsigned char)0xC1);
6940     emit_int8((unsigned char)(0xF8 | encode));
6941     emit_int8(imm8);
6942   }
6943 }
6944 
6945 void Assembler::sarq(Register dst) {
6946   int encode = prefixq_and_encode(dst->encoding());

< prev index next >