1 /*
   2  * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "gc/shared/cardTableBarrierSet.hpp"
  29 #include "gc/shared/collectedHeap.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "memory/resourceArea.hpp"
  32 #include "prims/methodHandles.hpp"
  33 #include "runtime/biasedLocking.hpp"
  34 #include "runtime/objectMonitor.hpp"
  35 #include "runtime/os.hpp"
  36 #include "runtime/sharedRuntime.hpp"
  37 #include "runtime/stubRoutines.hpp"
  38 #include "utilities/macros.hpp"
  39 
  40 #ifdef PRODUCT
  41 #define BLOCK_COMMENT(str) /* nothing */
  42 #define STOP(error) stop(error)
  43 #else
  44 #define BLOCK_COMMENT(str) block_comment(str)
  45 #define STOP(error) block_comment(error); stop(error)
  46 #endif
  47 
  48 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  49 // Implementation of AddressLiteral
  50 
  51 // A 2-D table for managing compressed displacement(disp8) on EVEX enabled platforms.
  52 unsigned char tuple_table[Assembler::EVEX_ETUP + 1][Assembler::AVX_512bit + 1] = {
  53   // -----------------Table 4.5 -------------------- //
  54   16, 32, 64,  // EVEX_FV(0)
  55   4,  4,  4,   // EVEX_FV(1) - with Evex.b
  56   16, 32, 64,  // EVEX_FV(2) - with Evex.w
  57   8,  8,  8,   // EVEX_FV(3) - with Evex.w and Evex.b
  58   8,  16, 32,  // EVEX_HV(0)
  59   4,  4,  4,   // EVEX_HV(1) - with Evex.b
  60   // -----------------Table 4.6 -------------------- //
  61   16, 32, 64,  // EVEX_FVM(0)
  62   1,  1,  1,   // EVEX_T1S(0)
  63   2,  2,  2,   // EVEX_T1S(1)
  64   4,  4,  4,   // EVEX_T1S(2)
  65   8,  8,  8,   // EVEX_T1S(3)
  66   4,  4,  4,   // EVEX_T1F(0)
  67   8,  8,  8,   // EVEX_T1F(1)
  68   8,  8,  8,   // EVEX_T2(0)
  69   0,  16, 16,  // EVEX_T2(1)
  70   0,  16, 16,  // EVEX_T4(0)
  71   0,  0,  32,  // EVEX_T4(1)
  72   0,  0,  32,  // EVEX_T8(0)
  73   8,  16, 32,  // EVEX_HVM(0)
  74   4,  8,  16,  // EVEX_QVM(0)
  75   2,  4,  8,   // EVEX_OVM(0)
  76   16, 16, 16,  // EVEX_M128(0)
  77   8,  32, 64,  // EVEX_DUP(0)
  78   0,  0,  0    // EVEX_NTUP
  79 };
  80 
  81 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  82   _is_lval = false;
  83   _target = target;
  84   switch (rtype) {
  85   case relocInfo::oop_type:
  86   case relocInfo::metadata_type:
  87     // Oops are a special case. Normally they would be their own section
  88     // but in cases like icBuffer they are literals in the code stream that
  89     // we don't have a section for. We use none so that we get a literal address
  90     // which is always patchable.
  91     break;
  92   case relocInfo::external_word_type:
  93     _rspec = external_word_Relocation::spec(target);
  94     break;
  95   case relocInfo::internal_word_type:
  96     _rspec = internal_word_Relocation::spec(target);
  97     break;
  98   case relocInfo::opt_virtual_call_type:
  99     _rspec = opt_virtual_call_Relocation::spec();
 100     break;
 101   case relocInfo::static_call_type:
 102     _rspec = static_call_Relocation::spec();
 103     break;
 104   case relocInfo::runtime_call_type:
 105     _rspec = runtime_call_Relocation::spec();
 106     break;
 107   case relocInfo::poll_type:
 108   case relocInfo::poll_return_type:
 109     _rspec = Relocation::spec_simple(rtype);
 110     break;
 111   case relocInfo::none:
 112     break;
 113   default:
 114     ShouldNotReachHere();
 115     break;
 116   }
 117 }
 118 
 119 // Implementation of Address
 120 
 121 #ifdef _LP64
 122 
 123 Address Address::make_array(ArrayAddress adr) {
 124   // Not implementable on 64bit machines
 125   // Should have been handled higher up the call chain.
 126   ShouldNotReachHere();
 127   return Address();
 128 }
 129 
 130 // exceedingly dangerous constructor
 131 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
 132   _base  = noreg;
 133   _index = noreg;
 134   _scale = no_scale;
 135   _disp  = disp;
 136   switch (rtype) {
 137     case relocInfo::external_word_type:
 138       _rspec = external_word_Relocation::spec(loc);
 139       break;
 140     case relocInfo::internal_word_type:
 141       _rspec = internal_word_Relocation::spec(loc);
 142       break;
 143     case relocInfo::runtime_call_type:
 144       // HMM
 145       _rspec = runtime_call_Relocation::spec();
 146       break;
 147     case relocInfo::poll_type:
 148     case relocInfo::poll_return_type:
 149       _rspec = Relocation::spec_simple(rtype);
 150       break;
 151     case relocInfo::none:
 152       break;
 153     default:
 154       ShouldNotReachHere();
 155   }
 156 }
 157 #else // LP64
 158 
 159 Address Address::make_array(ArrayAddress adr) {
 160   AddressLiteral base = adr.base();
 161   Address index = adr.index();
 162   assert(index._disp == 0, "must not have disp"); // maybe it can?
 163   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
 164   array._rspec = base._rspec;
 165   return array;
 166 }
 167 
 168 // exceedingly dangerous constructor
 169 Address::Address(address loc, RelocationHolder spec) {
 170   _base  = noreg;
 171   _index = noreg;
 172   _scale = no_scale;
 173   _disp  = (intptr_t) loc;
 174   _rspec = spec;
 175 }
 176 
 177 #endif // _LP64
 178 
 179 
 180 
 181 // Convert the raw encoding form into the form expected by the constructor for
 182 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 183 // that to noreg for the Address constructor.
 184 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
 185   RelocationHolder rspec;
 186   if (disp_reloc != relocInfo::none) {
 187     rspec = Relocation::spec_simple(disp_reloc);
 188   }
 189   bool valid_index = index != rsp->encoding();
 190   if (valid_index) {
 191     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 192     madr._rspec = rspec;
 193     return madr;
 194   } else {
 195     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 196     madr._rspec = rspec;
 197     return madr;
 198   }
 199 }
 200 
 201 // Implementation of Assembler
 202 
 203 int AbstractAssembler::code_fill_byte() {
 204   return (u_char)'\xF4'; // hlt
 205 }
 206 
 207 // make this go away someday
 208 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
 209   if (rtype == relocInfo::none)
 210     emit_int32(data);
 211   else
 212     emit_data(data, Relocation::spec_simple(rtype), format);
 213 }
 214 
 215 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
 216   assert(imm_operand == 0, "default format must be immediate in this file");
 217   assert(inst_mark() != NULL, "must be inside InstructionMark");
 218   if (rspec.type() !=  relocInfo::none) {
 219     #ifdef ASSERT
 220       check_relocation(rspec, format);
 221     #endif
 222     // Do not use AbstractAssembler::relocate, which is not intended for
 223     // embedded words.  Instead, relocate to the enclosing instruction.
 224 
 225     // hack. call32 is too wide for mask so use disp32
 226     if (format == call32_operand)
 227       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 228     else
 229       code_section()->relocate(inst_mark(), rspec, format);
 230   }
 231   emit_int32(data);
 232 }
 233 
 234 static int encode(Register r) {
 235   int enc = r->encoding();
 236   if (enc >= 8) {
 237     enc -= 8;
 238   }
 239   return enc;
 240 }
 241 
 242 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
 243   assert(dst->has_byte_register(), "must have byte register");
 244   assert(isByte(op1) && isByte(op2), "wrong opcode");
 245   assert(isByte(imm8), "not a byte");
 246   assert((op1 & 0x01) == 0, "should be 8bit operation");
 247   emit_int8(op1);
 248   emit_int8(op2 | encode(dst));
 249   emit_int8(imm8);
 250 }
 251 
 252 
 253 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
 254   assert(isByte(op1) && isByte(op2), "wrong opcode");
 255   assert((op1 & 0x01) == 1, "should be 32bit operation");
 256   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 257   if (is8bit(imm32)) {
 258     emit_int8(op1 | 0x02); // set sign bit
 259     emit_int8(op2 | encode(dst));
 260     emit_int8(imm32 & 0xFF);
 261   } else {
 262     emit_int8(op1);
 263     emit_int8(op2 | encode(dst));
 264     emit_int32(imm32);
 265   }
 266 }
 267 
 268 // Force generation of a 4 byte immediate value even if it fits into 8bit
 269 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
 270   assert(isByte(op1) && isByte(op2), "wrong opcode");
 271   assert((op1 & 0x01) == 1, "should be 32bit operation");
 272   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 273   emit_int8(op1);
 274   emit_int8(op2 | encode(dst));
 275   emit_int32(imm32);
 276 }
 277 
 278 // immediate-to-memory forms
 279 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
 280   assert((op1 & 0x01) == 1, "should be 32bit operation");
 281   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 282   if (is8bit(imm32)) {
 283     emit_int8(op1 | 0x02); // set sign bit
 284     emit_operand(rm, adr, 1);
 285     emit_int8(imm32 & 0xFF);
 286   } else {
 287     emit_int8(op1);
 288     emit_operand(rm, adr, 4);
 289     emit_int32(imm32);
 290   }
 291 }
 292 
 293 
 294 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 295   assert(isByte(op1) && isByte(op2), "wrong opcode");
 296   emit_int8(op1);
 297   emit_int8(op2 | encode(dst) << 3 | encode(src));
 298 }
 299 
 300 
 301 bool Assembler::query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
 302                                            int cur_tuple_type, int in_size_in_bits, int cur_encoding) {
 303   int mod_idx = 0;
 304   // We will test if the displacement fits the compressed format and if so
 305   // apply the compression to the displacment iff the result is8bit.
 306   if (VM_Version::supports_evex() && is_evex_inst) {
 307     switch (cur_tuple_type) {
 308     case EVEX_FV:
 309       if ((cur_encoding & VEX_W) == VEX_W) {
 310         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 311       } else {
 312         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 313       }
 314       break;
 315 
 316     case EVEX_HV:
 317       mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 318       break;
 319 
 320     case EVEX_FVM:
 321       break;
 322 
 323     case EVEX_T1S:
 324       switch (in_size_in_bits) {
 325       case EVEX_8bit:
 326         break;
 327 
 328       case EVEX_16bit:
 329         mod_idx = 1;
 330         break;
 331 
 332       case EVEX_32bit:
 333         mod_idx = 2;
 334         break;
 335 
 336       case EVEX_64bit:
 337         mod_idx = 3;
 338         break;
 339       }
 340       break;
 341 
 342     case EVEX_T1F:
 343     case EVEX_T2:
 344     case EVEX_T4:
 345       mod_idx = (in_size_in_bits == EVEX_64bit) ? 1 : 0;
 346       break;
 347 
 348     case EVEX_T8:
 349       break;
 350 
 351     case EVEX_HVM:
 352       break;
 353 
 354     case EVEX_QVM:
 355       break;
 356 
 357     case EVEX_OVM:
 358       break;
 359 
 360     case EVEX_M128:
 361       break;
 362 
 363     case EVEX_DUP:
 364       break;
 365 
 366     default:
 367       assert(0, "no valid evex tuple_table entry");
 368       break;
 369     }
 370 
 371     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 372       int disp_factor = tuple_table[cur_tuple_type + mod_idx][vector_len];
 373       if ((disp % disp_factor) == 0) {
 374         int new_disp = disp / disp_factor;
 375         if ((-0x80 <= new_disp && new_disp < 0x80)) {
 376           disp = new_disp;
 377         }
 378       } else {
 379         return false;
 380       }
 381     }
 382   }
 383   return (-0x80 <= disp && disp < 0x80);
 384 }
 385 
 386 
 387 bool Assembler::emit_compressed_disp_byte(int &disp) {
 388   int mod_idx = 0;
 389   // We will test if the displacement fits the compressed format and if so
 390   // apply the compression to the displacment iff the result is8bit.
 391   if (VM_Version::supports_evex() && _attributes && _attributes->is_evex_instruction()) {
 392     int evex_encoding = _attributes->get_evex_encoding();
 393     int tuple_type = _attributes->get_tuple_type();
 394     switch (tuple_type) {
 395     case EVEX_FV:
 396       if ((evex_encoding & VEX_W) == VEX_W) {
 397         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 398       } else {
 399         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 400       }
 401       break;
 402 
 403     case EVEX_HV:
 404       mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 405       break;
 406 
 407     case EVEX_FVM:
 408       break;
 409 
 410     case EVEX_T1S:
 411       switch (_attributes->get_input_size()) {
 412       case EVEX_8bit:
 413         break;
 414 
 415       case EVEX_16bit:
 416         mod_idx = 1;
 417         break;
 418 
 419       case EVEX_32bit:
 420         mod_idx = 2;
 421         break;
 422 
 423       case EVEX_64bit:
 424         mod_idx = 3;
 425         break;
 426       }
 427       break;
 428 
 429     case EVEX_T1F:
 430     case EVEX_T2:
 431     case EVEX_T4:
 432       mod_idx = (_attributes->get_input_size() == EVEX_64bit) ? 1 : 0;
 433       break;
 434 
 435     case EVEX_T8:
 436       break;
 437 
 438     case EVEX_HVM:
 439       break;
 440 
 441     case EVEX_QVM:
 442       break;
 443 
 444     case EVEX_OVM:
 445       break;
 446 
 447     case EVEX_M128:
 448       break;
 449 
 450     case EVEX_DUP:
 451       break;
 452 
 453     default:
 454       assert(0, "no valid evex tuple_table entry");
 455       break;
 456     }
 457 
 458     int vector_len = _attributes->get_vector_len();
 459     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 460       int disp_factor = tuple_table[tuple_type + mod_idx][vector_len];
 461       if ((disp % disp_factor) == 0) {
 462         int new_disp = disp / disp_factor;
 463         if (is8bit(new_disp)) {
 464           disp = new_disp;
 465         }
 466       } else {
 467         return false;
 468       }
 469     }
 470   }
 471   return is8bit(disp);
 472 }
 473 
 474 
 475 void Assembler::emit_operand(Register reg, Register base, Register index,
 476                              Address::ScaleFactor scale, int disp,
 477                              RelocationHolder const& rspec,
 478                              int rip_relative_correction) {
 479   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 480 
 481   // Encode the registers as needed in the fields they are used in
 482 
 483   int regenc = encode(reg) << 3;
 484   int indexenc = index->is_valid() ? encode(index) << 3 : 0;
 485   int baseenc = base->is_valid() ? encode(base) : 0;
 486 
 487   if (base->is_valid()) {
 488     if (index->is_valid()) {
 489       assert(scale != Address::no_scale, "inconsistent address");
 490       // [base + index*scale + disp]
 491       if (disp == 0 && rtype == relocInfo::none  &&
 492           base != rbp LP64_ONLY(&& base != r13)) {
 493         // [base + index*scale]
 494         // [00 reg 100][ss index base]
 495         assert(index != rsp, "illegal addressing mode");
 496         emit_int8(0x04 | regenc);
 497         emit_int8(scale << 6 | indexenc | baseenc);
 498       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 499         // [base + index*scale + imm8]
 500         // [01 reg 100][ss index base] imm8
 501         assert(index != rsp, "illegal addressing mode");
 502         emit_int8(0x44 | regenc);
 503         emit_int8(scale << 6 | indexenc | baseenc);
 504         emit_int8(disp & 0xFF);
 505       } else {
 506         // [base + index*scale + disp32]
 507         // [10 reg 100][ss index base] disp32
 508         assert(index != rsp, "illegal addressing mode");
 509         emit_int8(0x84 | regenc);
 510         emit_int8(scale << 6 | indexenc | baseenc);
 511         emit_data(disp, rspec, disp32_operand);
 512       }
 513     } else if (base == rsp LP64_ONLY(|| base == r12)) {
 514       // [rsp + disp]
 515       if (disp == 0 && rtype == relocInfo::none) {
 516         // [rsp]
 517         // [00 reg 100][00 100 100]
 518         emit_int8(0x04 | regenc);
 519         emit_int8(0x24);
 520       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 521         // [rsp + imm8]
 522         // [01 reg 100][00 100 100] disp8
 523         emit_int8(0x44 | regenc);
 524         emit_int8(0x24);
 525         emit_int8(disp & 0xFF);
 526       } else {
 527         // [rsp + imm32]
 528         // [10 reg 100][00 100 100] disp32
 529         emit_int8(0x84 | regenc);
 530         emit_int8(0x24);
 531         emit_data(disp, rspec, disp32_operand);
 532       }
 533     } else {
 534       // [base + disp]
 535       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
 536       if (disp == 0 && rtype == relocInfo::none &&
 537           base != rbp LP64_ONLY(&& base != r13)) {
 538         // [base]
 539         // [00 reg base]
 540         emit_int8(0x00 | regenc | baseenc);
 541       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 542         // [base + disp8]
 543         // [01 reg base] disp8
 544         emit_int8(0x40 | regenc | baseenc);
 545         emit_int8(disp & 0xFF);
 546       } else {
 547         // [base + disp32]
 548         // [10 reg base] disp32
 549         emit_int8(0x80 | regenc | baseenc);
 550         emit_data(disp, rspec, disp32_operand);
 551       }
 552     }
 553   } else {
 554     if (index->is_valid()) {
 555       assert(scale != Address::no_scale, "inconsistent address");
 556       // [index*scale + disp]
 557       // [00 reg 100][ss index 101] disp32
 558       assert(index != rsp, "illegal addressing mode");
 559       emit_int8(0x04 | regenc);
 560       emit_int8(scale << 6 | indexenc | 0x05);
 561       emit_data(disp, rspec, disp32_operand);
 562     } else if (rtype != relocInfo::none ) {
 563       // [disp] (64bit) RIP-RELATIVE (32bit) abs
 564       // [00 000 101] disp32
 565 
 566       emit_int8(0x05 | regenc);
 567       // Note that the RIP-rel. correction applies to the generated
 568       // disp field, but _not_ to the target address in the rspec.
 569 
 570       // disp was created by converting the target address minus the pc
 571       // at the start of the instruction. That needs more correction here.
 572       // intptr_t disp = target - next_ip;
 573       assert(inst_mark() != NULL, "must be inside InstructionMark");
 574       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 575       int64_t adjusted = disp;
 576       // Do rip-rel adjustment for 64bit
 577       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 578       assert(is_simm32(adjusted),
 579              "must be 32bit offset (RIP relative address)");
 580       emit_data((int32_t) adjusted, rspec, disp32_operand);
 581 
 582     } else {
 583       // 32bit never did this, did everything as the rip-rel/disp code above
 584       // [disp] ABSOLUTE
 585       // [00 reg 100][00 100 101] disp32
 586       emit_int8(0x04 | regenc);
 587       emit_int8(0x25);
 588       emit_data(disp, rspec, disp32_operand);
 589     }
 590   }
 591 }
 592 
 593 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 594                              Address::ScaleFactor scale, int disp,
 595                              RelocationHolder const& rspec) {
 596   if (UseAVX > 2) {
 597     int xreg_enc = reg->encoding();
 598     if (xreg_enc > 15) {
 599       XMMRegister new_reg = as_XMMRegister(xreg_enc & 0xf);
 600       emit_operand((Register)new_reg, base, index, scale, disp, rspec);
 601       return;
 602     }
 603   }
 604   emit_operand((Register)reg, base, index, scale, disp, rspec);
 605 }
 606 
 607 // Secret local extension to Assembler::WhichOperand:
 608 #define end_pc_operand (_WhichOperand_limit)
 609 
 610 address Assembler::locate_operand(address inst, WhichOperand which) {
 611   // Decode the given instruction, and return the address of
 612   // an embedded 32-bit operand word.
 613 
 614   // If "which" is disp32_operand, selects the displacement portion
 615   // of an effective address specifier.
 616   // If "which" is imm64_operand, selects the trailing immediate constant.
 617   // If "which" is call32_operand, selects the displacement of a call or jump.
 618   // Caller is responsible for ensuring that there is such an operand,
 619   // and that it is 32/64 bits wide.
 620 
 621   // If "which" is end_pc_operand, find the end of the instruction.
 622 
 623   address ip = inst;
 624   bool is_64bit = false;
 625 
 626   debug_only(bool has_disp32 = false);
 627   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
 628 
 629   again_after_prefix:
 630   switch (0xFF & *ip++) {
 631 
 632   // These convenience macros generate groups of "case" labels for the switch.
 633 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
 634 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
 635              case (x)+4: case (x)+5: case (x)+6: case (x)+7
 636 #define REP16(x) REP8((x)+0): \
 637               case REP8((x)+8)
 638 
 639   case CS_segment:
 640   case SS_segment:
 641   case DS_segment:
 642   case ES_segment:
 643   case FS_segment:
 644   case GS_segment:
 645     // Seems dubious
 646     LP64_ONLY(assert(false, "shouldn't have that prefix"));
 647     assert(ip == inst+1, "only one prefix allowed");
 648     goto again_after_prefix;
 649 
 650   case 0x67:
 651   case REX:
 652   case REX_B:
 653   case REX_X:
 654   case REX_XB:
 655   case REX_R:
 656   case REX_RB:
 657   case REX_RX:
 658   case REX_RXB:
 659     NOT_LP64(assert(false, "64bit prefixes"));
 660     goto again_after_prefix;
 661 
 662   case REX_W:
 663   case REX_WB:
 664   case REX_WX:
 665   case REX_WXB:
 666   case REX_WR:
 667   case REX_WRB:
 668   case REX_WRX:
 669   case REX_WRXB:
 670     NOT_LP64(assert(false, "64bit prefixes"));
 671     is_64bit = true;
 672     goto again_after_prefix;
 673 
 674   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
 675   case 0x88: // movb a, r
 676   case 0x89: // movl a, r
 677   case 0x8A: // movb r, a
 678   case 0x8B: // movl r, a
 679   case 0x8F: // popl a
 680     debug_only(has_disp32 = true);
 681     break;
 682 
 683   case 0x68: // pushq #32
 684     if (which == end_pc_operand) {
 685       return ip + 4;
 686     }
 687     assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
 688     return ip;                  // not produced by emit_operand
 689 
 690   case 0x66: // movw ... (size prefix)
 691     again_after_size_prefix2:
 692     switch (0xFF & *ip++) {
 693     case REX:
 694     case REX_B:
 695     case REX_X:
 696     case REX_XB:
 697     case REX_R:
 698     case REX_RB:
 699     case REX_RX:
 700     case REX_RXB:
 701     case REX_W:
 702     case REX_WB:
 703     case REX_WX:
 704     case REX_WXB:
 705     case REX_WR:
 706     case REX_WRB:
 707     case REX_WRX:
 708     case REX_WRXB:
 709       NOT_LP64(assert(false, "64bit prefix found"));
 710       goto again_after_size_prefix2;
 711     case 0x8B: // movw r, a
 712     case 0x89: // movw a, r
 713       debug_only(has_disp32 = true);
 714       break;
 715     case 0xC7: // movw a, #16
 716       debug_only(has_disp32 = true);
 717       tail_size = 2;  // the imm16
 718       break;
 719     case 0x0F: // several SSE/SSE2 variants
 720       ip--;    // reparse the 0x0F
 721       goto again_after_prefix;
 722     default:
 723       ShouldNotReachHere();
 724     }
 725     break;
 726 
 727   case REP8(0xB8): // movl/q r, #32/#64(oop?)
 728     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
 729     // these asserts are somewhat nonsensical
 730 #ifndef _LP64
 731     assert(which == imm_operand || which == disp32_operand,
 732            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 733 #else
 734     assert((which == call32_operand || which == imm_operand) && is_64bit ||
 735            which == narrow_oop_operand && !is_64bit,
 736            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 737 #endif // _LP64
 738     return ip;
 739 
 740   case 0x69: // imul r, a, #32
 741   case 0xC7: // movl a, #32(oop?)
 742     tail_size = 4;
 743     debug_only(has_disp32 = true); // has both kinds of operands!
 744     break;
 745 
 746   case 0x0F: // movx..., etc.
 747     switch (0xFF & *ip++) {
 748     case 0x3A: // pcmpestri
 749       tail_size = 1;
 750     case 0x38: // ptest, pmovzxbw
 751       ip++; // skip opcode
 752       debug_only(has_disp32 = true); // has both kinds of operands!
 753       break;
 754 
 755     case 0x70: // pshufd r, r/a, #8
 756       debug_only(has_disp32 = true); // has both kinds of operands!
 757     case 0x73: // psrldq r, #8
 758       tail_size = 1;
 759       break;
 760 
 761     case 0x12: // movlps
 762     case 0x28: // movaps
 763     case 0x2E: // ucomiss
 764     case 0x2F: // comiss
 765     case 0x54: // andps
 766     case 0x55: // andnps
 767     case 0x56: // orps
 768     case 0x57: // xorps
 769     case 0x58: // addpd
 770     case 0x59: // mulpd
 771     case 0x6E: // movd
 772     case 0x7E: // movd
 773     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
 774     case 0xFE: // paddd
 775       debug_only(has_disp32 = true);
 776       break;
 777 
 778     case 0xAD: // shrd r, a, %cl
 779     case 0xAF: // imul r, a
 780     case 0xBE: // movsbl r, a (movsxb)
 781     case 0xBF: // movswl r, a (movsxw)
 782     case 0xB6: // movzbl r, a (movzxb)
 783     case 0xB7: // movzwl r, a (movzxw)
 784     case REP16(0x40): // cmovl cc, r, a
 785     case 0xB0: // cmpxchgb
 786     case 0xB1: // cmpxchg
 787     case 0xC1: // xaddl
 788     case 0xC7: // cmpxchg8
 789     case REP16(0x90): // setcc a
 790       debug_only(has_disp32 = true);
 791       // fall out of the switch to decode the address
 792       break;
 793 
 794     case 0xC4: // pinsrw r, a, #8
 795       debug_only(has_disp32 = true);
 796     case 0xC5: // pextrw r, r, #8
 797       tail_size = 1;  // the imm8
 798       break;
 799 
 800     case 0xAC: // shrd r, a, #8
 801       debug_only(has_disp32 = true);
 802       tail_size = 1;  // the imm8
 803       break;
 804 
 805     case REP16(0x80): // jcc rdisp32
 806       if (which == end_pc_operand)  return ip + 4;
 807       assert(which == call32_operand, "jcc has no disp32 or imm");
 808       return ip;
 809     default:
 810       ShouldNotReachHere();
 811     }
 812     break;
 813 
 814   case 0x81: // addl a, #32; addl r, #32
 815     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 816     // on 32bit in the case of cmpl, the imm might be an oop
 817     tail_size = 4;
 818     debug_only(has_disp32 = true); // has both kinds of operands!
 819     break;
 820 
 821   case 0x83: // addl a, #8; addl r, #8
 822     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 823     debug_only(has_disp32 = true); // has both kinds of operands!
 824     tail_size = 1;
 825     break;
 826 
 827   case 0x9B:
 828     switch (0xFF & *ip++) {
 829     case 0xD9: // fnstcw a
 830       debug_only(has_disp32 = true);
 831       break;
 832     default:
 833       ShouldNotReachHere();
 834     }
 835     break;
 836 
 837   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
 838   case REP4(0x10): // adc...
 839   case REP4(0x20): // and...
 840   case REP4(0x30): // xor...
 841   case REP4(0x08): // or...
 842   case REP4(0x18): // sbb...
 843   case REP4(0x28): // sub...
 844   case 0xF7: // mull a
 845   case 0x8D: // lea r, a
 846   case 0x87: // xchg r, a
 847   case REP4(0x38): // cmp...
 848   case 0x85: // test r, a
 849     debug_only(has_disp32 = true); // has both kinds of operands!
 850     break;
 851 
 852   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
 853   case 0xC6: // movb a, #8
 854   case 0x80: // cmpb a, #8
 855   case 0x6B: // imul r, a, #8
 856     debug_only(has_disp32 = true); // has both kinds of operands!
 857     tail_size = 1; // the imm8
 858     break;
 859 
 860   case 0xC4: // VEX_3bytes
 861   case 0xC5: // VEX_2bytes
 862     assert((UseAVX > 0), "shouldn't have VEX prefix");
 863     assert(ip == inst+1, "no prefixes allowed");
 864     // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
 865     // but they have prefix 0x0F and processed when 0x0F processed above.
 866     //
 867     // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
 868     // instructions (these instructions are not supported in 64-bit mode).
 869     // To distinguish them bits [7:6] are set in the VEX second byte since
 870     // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
 871     // those VEX bits REX and vvvv bits are inverted.
 872     //
 873     // Fortunately C2 doesn't generate these instructions so we don't need
 874     // to check for them in product version.
 875 
 876     // Check second byte
 877     NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
 878 
 879     int vex_opcode;
 880     // First byte
 881     if ((0xFF & *inst) == VEX_3bytes) {
 882       vex_opcode = VEX_OPCODE_MASK & *ip;
 883       ip++; // third byte
 884       is_64bit = ((VEX_W & *ip) == VEX_W);
 885     } else {
 886       vex_opcode = VEX_OPCODE_0F;
 887     }
 888     ip++; // opcode
 889     // To find the end of instruction (which == end_pc_operand).
 890     switch (vex_opcode) {
 891       case VEX_OPCODE_0F:
 892         switch (0xFF & *ip) {
 893         case 0x70: // pshufd r, r/a, #8
 894         case 0x71: // ps[rl|ra|ll]w r, #8
 895         case 0x72: // ps[rl|ra|ll]d r, #8
 896         case 0x73: // ps[rl|ra|ll]q r, #8
 897         case 0xC2: // cmp[ps|pd|ss|sd] r, r, r/a, #8
 898         case 0xC4: // pinsrw r, r, r/a, #8
 899         case 0xC5: // pextrw r/a, r, #8
 900         case 0xC6: // shufp[s|d] r, r, r/a, #8
 901           tail_size = 1;  // the imm8
 902           break;
 903         }
 904         break;
 905       case VEX_OPCODE_0F_3A:
 906         tail_size = 1;
 907         break;
 908     }
 909     ip++; // skip opcode
 910     debug_only(has_disp32 = true); // has both kinds of operands!
 911     break;
 912 
 913   case 0x62: // EVEX_4bytes
 914     assert(VM_Version::supports_evex(), "shouldn't have EVEX prefix");
 915     assert(ip == inst+1, "no prefixes allowed");
 916     // no EVEX collisions, all instructions that have 0x62 opcodes
 917     // have EVEX versions and are subopcodes of 0x66
 918     ip++; // skip P0 and exmaine W in P1
 919     is_64bit = ((VEX_W & *ip) == VEX_W);
 920     ip++; // move to P2
 921     ip++; // skip P2, move to opcode
 922     // To find the end of instruction (which == end_pc_operand).
 923     switch (0xFF & *ip) {
 924     case 0x22: // pinsrd r, r/a, #8
 925     case 0x61: // pcmpestri r, r/a, #8
 926     case 0x70: // pshufd r, r/a, #8
 927     case 0x73: // psrldq r, #8
 928       tail_size = 1;  // the imm8
 929       break;
 930     default:
 931       break;
 932     }
 933     ip++; // skip opcode
 934     debug_only(has_disp32 = true); // has both kinds of operands!
 935     break;
 936 
 937   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 938   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 939   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 940   case 0xDD: // fld_d a; fst_d a; fstp_d a
 941   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 942   case 0xDF: // fild_d a; fistp_d a
 943   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 944   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 945   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 946     debug_only(has_disp32 = true);
 947     break;
 948 
 949   case 0xE8: // call rdisp32
 950   case 0xE9: // jmp  rdisp32
 951     if (which == end_pc_operand)  return ip + 4;
 952     assert(which == call32_operand, "call has no disp32 or imm");
 953     return ip;
 954 
 955   case 0xF0:                    // Lock
 956     assert(os::is_MP(), "only on MP");
 957     goto again_after_prefix;
 958 
 959   case 0xF3:                    // For SSE
 960   case 0xF2:                    // For SSE2
 961     switch (0xFF & *ip++) {
 962     case REX:
 963     case REX_B:
 964     case REX_X:
 965     case REX_XB:
 966     case REX_R:
 967     case REX_RB:
 968     case REX_RX:
 969     case REX_RXB:
 970     case REX_W:
 971     case REX_WB:
 972     case REX_WX:
 973     case REX_WXB:
 974     case REX_WR:
 975     case REX_WRB:
 976     case REX_WRX:
 977     case REX_WRXB:
 978       NOT_LP64(assert(false, "found 64bit prefix"));
 979       ip++;
 980     default:
 981       ip++;
 982     }
 983     debug_only(has_disp32 = true); // has both kinds of operands!
 984     break;
 985 
 986   default:
 987     ShouldNotReachHere();
 988 
 989 #undef REP8
 990 #undef REP16
 991   }
 992 
 993   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
 994 #ifdef _LP64
 995   assert(which != imm_operand, "instruction is not a movq reg, imm64");
 996 #else
 997   // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
 998   assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
 999 #endif // LP64
1000   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
1001 
1002   // parse the output of emit_operand
1003   int op2 = 0xFF & *ip++;
1004   int base = op2 & 0x07;
1005   int op3 = -1;
1006   const int b100 = 4;
1007   const int b101 = 5;
1008   if (base == b100 && (op2 >> 6) != 3) {
1009     op3 = 0xFF & *ip++;
1010     base = op3 & 0x07;   // refetch the base
1011   }
1012   // now ip points at the disp (if any)
1013 
1014   switch (op2 >> 6) {
1015   case 0:
1016     // [00 reg  100][ss index base]
1017     // [00 reg  100][00   100  esp]
1018     // [00 reg base]
1019     // [00 reg  100][ss index  101][disp32]
1020     // [00 reg  101]               [disp32]
1021 
1022     if (base == b101) {
1023       if (which == disp32_operand)
1024         return ip;              // caller wants the disp32
1025       ip += 4;                  // skip the disp32
1026     }
1027     break;
1028 
1029   case 1:
1030     // [01 reg  100][ss index base][disp8]
1031     // [01 reg  100][00   100  esp][disp8]
1032     // [01 reg base]               [disp8]
1033     ip += 1;                    // skip the disp8
1034     break;
1035 
1036   case 2:
1037     // [10 reg  100][ss index base][disp32]
1038     // [10 reg  100][00   100  esp][disp32]
1039     // [10 reg base]               [disp32]
1040     if (which == disp32_operand)
1041       return ip;                // caller wants the disp32
1042     ip += 4;                    // skip the disp32
1043     break;
1044 
1045   case 3:
1046     // [11 reg base]  (not a memory addressing mode)
1047     break;
1048   }
1049 
1050   if (which == end_pc_operand) {
1051     return ip + tail_size;
1052   }
1053 
1054 #ifdef _LP64
1055   assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
1056 #else
1057   assert(which == imm_operand, "instruction has only an imm field");
1058 #endif // LP64
1059   return ip;
1060 }
1061 
1062 address Assembler::locate_next_instruction(address inst) {
1063   // Secretly share code with locate_operand:
1064   return locate_operand(inst, end_pc_operand);
1065 }
1066 
1067 
1068 #ifdef ASSERT
1069 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
1070   address inst = inst_mark();
1071   assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
1072   address opnd;
1073 
1074   Relocation* r = rspec.reloc();
1075   if (r->type() == relocInfo::none) {
1076     return;
1077   } else if (r->is_call() || format == call32_operand) {
1078     // assert(format == imm32_operand, "cannot specify a nonzero format");
1079     opnd = locate_operand(inst, call32_operand);
1080   } else if (r->is_data()) {
1081     assert(format == imm_operand || format == disp32_operand
1082            LP64_ONLY(|| format == narrow_oop_operand), "format ok");
1083     opnd = locate_operand(inst, (WhichOperand)format);
1084   } else {
1085     assert(format == imm_operand, "cannot specify a format");
1086     return;
1087   }
1088   assert(opnd == pc(), "must put operand where relocs can find it");
1089 }
1090 #endif // ASSERT
1091 
1092 void Assembler::emit_operand32(Register reg, Address adr) {
1093   assert(reg->encoding() < 8, "no extended registers");
1094   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1095   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1096                adr._rspec);
1097 }
1098 
1099 void Assembler::emit_operand(Register reg, Address adr,
1100                              int rip_relative_correction) {
1101   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1102                adr._rspec,
1103                rip_relative_correction);
1104 }
1105 
1106 void Assembler::emit_operand(XMMRegister reg, Address adr) {
1107   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1108                adr._rspec);
1109 }
1110 
1111 // MMX operations
1112 void Assembler::emit_operand(MMXRegister reg, Address adr) {
1113   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1114   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1115 }
1116 
1117 // work around gcc (3.2.1-7a) bug
1118 void Assembler::emit_operand(Address adr, MMXRegister reg) {
1119   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1120   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1121 }
1122 
1123 
1124 void Assembler::emit_farith(int b1, int b2, int i) {
1125   assert(isByte(b1) && isByte(b2), "wrong opcode");
1126   assert(0 <= i &&  i < 8, "illegal stack offset");
1127   emit_int8(b1);
1128   emit_int8(b2 + i);
1129 }
1130 
1131 
1132 // Now the Assembler instructions (identical for 32/64 bits)
1133 
1134 void Assembler::adcl(Address dst, int32_t imm32) {
1135   InstructionMark im(this);
1136   prefix(dst);
1137   emit_arith_operand(0x81, rdx, dst, imm32);
1138 }
1139 
1140 void Assembler::adcl(Address dst, Register src) {
1141   InstructionMark im(this);
1142   prefix(dst, src);
1143   emit_int8(0x11);
1144   emit_operand(src, dst);
1145 }
1146 
1147 void Assembler::adcl(Register dst, int32_t imm32) {
1148   prefix(dst);
1149   emit_arith(0x81, 0xD0, dst, imm32);
1150 }
1151 
1152 void Assembler::adcl(Register dst, Address src) {
1153   InstructionMark im(this);
1154   prefix(src, dst);
1155   emit_int8(0x13);
1156   emit_operand(dst, src);
1157 }
1158 
1159 void Assembler::adcl(Register dst, Register src) {
1160   (void) prefix_and_encode(dst->encoding(), src->encoding());
1161   emit_arith(0x13, 0xC0, dst, src);
1162 }
1163 
1164 void Assembler::addl(Address dst, int32_t imm32) {
1165   InstructionMark im(this);
1166   prefix(dst);
1167   emit_arith_operand(0x81, rax, dst, imm32);
1168 }
1169 
1170 void Assembler::addb(Address dst, int imm8) {
1171   InstructionMark im(this);
1172   prefix(dst);
1173   emit_int8((unsigned char)0x80);
1174   emit_operand(rax, dst, 1);
1175   emit_int8(imm8);
1176 }
1177 
1178 void Assembler::addw(Address dst, int imm16) {
1179   InstructionMark im(this);
1180   emit_int8(0x66);
1181   prefix(dst);
1182   emit_int8((unsigned char)0x81);
1183   emit_operand(rax, dst, 2);
1184   emit_int16(imm16);
1185 }
1186 
1187 void Assembler::addl(Address dst, Register src) {
1188   InstructionMark im(this);
1189   prefix(dst, src);
1190   emit_int8(0x01);
1191   emit_operand(src, dst);
1192 }
1193 
1194 void Assembler::addl(Register dst, int32_t imm32) {
1195   prefix(dst);
1196   emit_arith(0x81, 0xC0, dst, imm32);
1197 }
1198 
1199 void Assembler::addl(Register dst, Address src) {
1200   InstructionMark im(this);
1201   prefix(src, dst);
1202   emit_int8(0x03);
1203   emit_operand(dst, src);
1204 }
1205 
1206 void Assembler::addl(Register dst, Register src) {
1207   (void) prefix_and_encode(dst->encoding(), src->encoding());
1208   emit_arith(0x03, 0xC0, dst, src);
1209 }
1210 
1211 void Assembler::addr_nop_4() {
1212   assert(UseAddressNop, "no CPU support");
1213   // 4 bytes: NOP DWORD PTR [EAX+0]
1214   emit_int8(0x0F);
1215   emit_int8(0x1F);
1216   emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
1217   emit_int8(0);    // 8-bits offset (1 byte)
1218 }
1219 
1220 void Assembler::addr_nop_5() {
1221   assert(UseAddressNop, "no CPU support");
1222   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
1223   emit_int8(0x0F);
1224   emit_int8(0x1F);
1225   emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
1226   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1227   emit_int8(0);    // 8-bits offset (1 byte)
1228 }
1229 
1230 void Assembler::addr_nop_7() {
1231   assert(UseAddressNop, "no CPU support");
1232   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
1233   emit_int8(0x0F);
1234   emit_int8(0x1F);
1235   emit_int8((unsigned char)0x80);
1236                    // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
1237   emit_int32(0);   // 32-bits offset (4 bytes)
1238 }
1239 
1240 void Assembler::addr_nop_8() {
1241   assert(UseAddressNop, "no CPU support");
1242   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1243   emit_int8(0x0F);
1244   emit_int8(0x1F);
1245   emit_int8((unsigned char)0x84);
1246                    // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
1247   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1248   emit_int32(0);   // 32-bits offset (4 bytes)
1249 }
1250 
1251 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
1252   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1253   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1254   attributes.set_rex_vex_w_reverted();
1255   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1256   emit_int8(0x58);
1257   emit_int8((unsigned char)(0xC0 | encode));
1258 }
1259 
1260 void Assembler::addsd(XMMRegister dst, Address src) {
1261   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1262   InstructionMark im(this);
1263   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1264   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1265   attributes.set_rex_vex_w_reverted();
1266   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1267   emit_int8(0x58);
1268   emit_operand(dst, src);
1269 }
1270 
1271 void Assembler::addss(XMMRegister dst, XMMRegister src) {
1272   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1273   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1274   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1275   emit_int8(0x58);
1276   emit_int8((unsigned char)(0xC0 | encode));
1277 }
1278 
1279 void Assembler::addss(XMMRegister dst, Address src) {
1280   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1281   InstructionMark im(this);
1282   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1283   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1284   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1285   emit_int8(0x58);
1286   emit_operand(dst, src);
1287 }
1288 
1289 void Assembler::aesdec(XMMRegister dst, Address src) {
1290   assert(VM_Version::supports_aes(), "");
1291   InstructionMark im(this);
1292   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1293   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1294   emit_int8((unsigned char)0xDE);
1295   emit_operand(dst, src);
1296 }
1297 
1298 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1299   assert(VM_Version::supports_aes(), "");
1300   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1301   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1302   emit_int8((unsigned char)0xDE);
1303   emit_int8(0xC0 | encode);
1304 }
1305 
1306 void Assembler::aesdeclast(XMMRegister dst, Address src) {
1307   assert(VM_Version::supports_aes(), "");
1308   InstructionMark im(this);
1309   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1310   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1311   emit_int8((unsigned char)0xDF);
1312   emit_operand(dst, src);
1313 }
1314 
1315 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1316   assert(VM_Version::supports_aes(), "");
1317   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1318   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1319   emit_int8((unsigned char)0xDF);
1320   emit_int8((unsigned char)(0xC0 | encode));
1321 }
1322 
1323 void Assembler::aesenc(XMMRegister dst, Address src) {
1324   assert(VM_Version::supports_aes(), "");
1325   InstructionMark im(this);
1326   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1327   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1328   emit_int8((unsigned char)0xDC);
1329   emit_operand(dst, src);
1330 }
1331 
1332 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1333   assert(VM_Version::supports_aes(), "");
1334   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1335   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1336   emit_int8((unsigned char)0xDC);
1337   emit_int8(0xC0 | encode);
1338 }
1339 
1340 void Assembler::aesenclast(XMMRegister dst, Address src) {
1341   assert(VM_Version::supports_aes(), "");
1342   InstructionMark im(this);
1343   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1344   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1345   emit_int8((unsigned char)0xDD);
1346   emit_operand(dst, src);
1347 }
1348 
1349 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1350   assert(VM_Version::supports_aes(), "");
1351   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1352   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1353   emit_int8((unsigned char)0xDD);
1354   emit_int8((unsigned char)(0xC0 | encode));
1355 }
1356 
1357 void Assembler::andl(Address dst, int32_t imm32) {
1358   InstructionMark im(this);
1359   prefix(dst);
1360   emit_int8((unsigned char)0x81);
1361   emit_operand(rsp, dst, 4);
1362   emit_int32(imm32);
1363 }
1364 
1365 void Assembler::andl(Register dst, int32_t imm32) {
1366   prefix(dst);
1367   emit_arith(0x81, 0xE0, dst, imm32);
1368 }
1369 
1370 void Assembler::andl(Register dst, Address src) {
1371   InstructionMark im(this);
1372   prefix(src, dst);
1373   emit_int8(0x23);
1374   emit_operand(dst, src);
1375 }
1376 
1377 void Assembler::andl(Register dst, Register src) {
1378   (void) prefix_and_encode(dst->encoding(), src->encoding());
1379   emit_arith(0x23, 0xC0, dst, src);
1380 }
1381 
1382 void Assembler::andnl(Register dst, Register src1, Register src2) {
1383   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1384   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1385   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1386   emit_int8((unsigned char)0xF2);
1387   emit_int8((unsigned char)(0xC0 | encode));
1388 }
1389 
1390 void Assembler::andnl(Register dst, Register src1, Address src2) {
1391   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1392   InstructionMark im(this);
1393   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1394   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1395   emit_int8((unsigned char)0xF2);
1396   emit_operand(dst, src2);
1397 }
1398 
1399 void Assembler::bsfl(Register dst, Register src) {
1400   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1401   emit_int8(0x0F);
1402   emit_int8((unsigned char)0xBC);
1403   emit_int8((unsigned char)(0xC0 | encode));
1404 }
1405 
1406 void Assembler::bsrl(Register dst, Register src) {
1407   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1408   emit_int8(0x0F);
1409   emit_int8((unsigned char)0xBD);
1410   emit_int8((unsigned char)(0xC0 | encode));
1411 }
1412 
1413 void Assembler::bswapl(Register reg) { // bswap
1414   int encode = prefix_and_encode(reg->encoding());
1415   emit_int8(0x0F);
1416   emit_int8((unsigned char)(0xC8 | encode));
1417 }
1418 
1419 void Assembler::blsil(Register dst, Register src) {
1420   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1421   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1422   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1423   emit_int8((unsigned char)0xF3);
1424   emit_int8((unsigned char)(0xC0 | encode));
1425 }
1426 
1427 void Assembler::blsil(Register dst, Address src) {
1428   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1429   InstructionMark im(this);
1430   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1431   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1432   emit_int8((unsigned char)0xF3);
1433   emit_operand(rbx, src);
1434 }
1435 
1436 void Assembler::blsmskl(Register dst, Register src) {
1437   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1438   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1439   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1440   emit_int8((unsigned char)0xF3);
1441   emit_int8((unsigned char)(0xC0 | encode));
1442 }
1443 
1444 void Assembler::blsmskl(Register dst, Address src) {
1445   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1446   InstructionMark im(this);
1447   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1448   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1449   emit_int8((unsigned char)0xF3);
1450   emit_operand(rdx, src);
1451 }
1452 
1453 void Assembler::blsrl(Register dst, Register src) {
1454   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1455   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1456   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1457   emit_int8((unsigned char)0xF3);
1458   emit_int8((unsigned char)(0xC0 | encode));
1459 }
1460 
1461 void Assembler::blsrl(Register dst, Address src) {
1462   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1463   InstructionMark im(this);
1464   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1465   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1466   emit_int8((unsigned char)0xF3);
1467   emit_operand(rcx, src);
1468 }
1469 
1470 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1471   // suspect disp32 is always good
1472   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1473 
1474   if (L.is_bound()) {
1475     const int long_size = 5;
1476     int offs = (int)( target(L) - pc() );
1477     assert(offs <= 0, "assembler error");
1478     InstructionMark im(this);
1479     // 1110 1000 #32-bit disp
1480     emit_int8((unsigned char)0xE8);
1481     emit_data(offs - long_size, rtype, operand);
1482   } else {
1483     InstructionMark im(this);
1484     // 1110 1000 #32-bit disp
1485     L.add_patch_at(code(), locator());
1486 
1487     emit_int8((unsigned char)0xE8);
1488     emit_data(int(0), rtype, operand);
1489   }
1490 }
1491 
1492 void Assembler::call(Register dst) {
1493   int encode = prefix_and_encode(dst->encoding());
1494   emit_int8((unsigned char)0xFF);
1495   emit_int8((unsigned char)(0xD0 | encode));
1496 }
1497 
1498 
1499 void Assembler::call(Address adr) {
1500   InstructionMark im(this);
1501   prefix(adr);
1502   emit_int8((unsigned char)0xFF);
1503   emit_operand(rdx, adr);
1504 }
1505 
1506 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1507   InstructionMark im(this);
1508   emit_int8((unsigned char)0xE8);
1509   intptr_t disp = entry - (pc() + sizeof(int32_t));
1510   // Entry is NULL in case of a scratch emit.
1511   assert(entry == NULL || is_simm32(disp), "disp=" INTPTR_FORMAT " must be 32bit offset (call2)", disp);
1512   // Technically, should use call32_operand, but this format is
1513   // implied by the fact that we're emitting a call instruction.
1514 
1515   int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1516   emit_data((int) disp, rspec, operand);
1517 }
1518 
1519 void Assembler::cdql() {
1520   emit_int8((unsigned char)0x99);
1521 }
1522 
1523 void Assembler::cld() {
1524   emit_int8((unsigned char)0xFC);
1525 }
1526 
1527 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1528   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1529   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1530   emit_int8(0x0F);
1531   emit_int8(0x40 | cc);
1532   emit_int8((unsigned char)(0xC0 | encode));
1533 }
1534 
1535 
1536 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1537   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1538   prefix(src, dst);
1539   emit_int8(0x0F);
1540   emit_int8(0x40 | cc);
1541   emit_operand(dst, src);
1542 }
1543 
1544 void Assembler::cmpb(Address dst, int imm8) {
1545   InstructionMark im(this);
1546   prefix(dst);
1547   emit_int8((unsigned char)0x80);
1548   emit_operand(rdi, dst, 1);
1549   emit_int8(imm8);
1550 }
1551 
1552 void Assembler::cmpl(Address dst, int32_t imm32) {
1553   InstructionMark im(this);
1554   prefix(dst);
1555   emit_int8((unsigned char)0x81);
1556   emit_operand(rdi, dst, 4);
1557   emit_int32(imm32);
1558 }
1559 
1560 void Assembler::cmpl(Register dst, int32_t imm32) {
1561   prefix(dst);
1562   emit_arith(0x81, 0xF8, dst, imm32);
1563 }
1564 
1565 void Assembler::cmpl(Register dst, Register src) {
1566   (void) prefix_and_encode(dst->encoding(), src->encoding());
1567   emit_arith(0x3B, 0xC0, dst, src);
1568 }
1569 
1570 void Assembler::cmpl(Register dst, Address  src) {
1571   InstructionMark im(this);
1572   prefix(src, dst);
1573   emit_int8((unsigned char)0x3B);
1574   emit_operand(dst, src);
1575 }
1576 
1577 void Assembler::cmpw(Address dst, int imm16) {
1578   InstructionMark im(this);
1579   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1580   emit_int8(0x66);
1581   emit_int8((unsigned char)0x81);
1582   emit_operand(rdi, dst, 2);
1583   emit_int16(imm16);
1584 }
1585 
1586 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1587 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1588 // The ZF is set if the compared values were equal, and cleared otherwise.
1589 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1590   InstructionMark im(this);
1591   prefix(adr, reg);
1592   emit_int8(0x0F);
1593   emit_int8((unsigned char)0xB1);
1594   emit_operand(reg, adr);
1595 }
1596 
1597 // The 8-bit cmpxchg compares the value at adr with the contents of rax,
1598 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1599 // The ZF is set if the compared values were equal, and cleared otherwise.
1600 void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg
1601   InstructionMark im(this);
1602   prefix(adr, reg, true);
1603   emit_int8(0x0F);
1604   emit_int8((unsigned char)0xB0);
1605   emit_operand(reg, adr);
1606 }
1607 
1608 void Assembler::comisd(XMMRegister dst, Address src) {
1609   // NOTE: dbx seems to decode this as comiss even though the
1610   // 0x66 is there. Strangly ucomisd comes out correct
1611   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1612   InstructionMark im(this);
1613   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);;
1614   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1615   attributes.set_rex_vex_w_reverted();
1616   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1617   emit_int8(0x2F);
1618   emit_operand(dst, src);
1619 }
1620 
1621 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1622   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1623   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1624   attributes.set_rex_vex_w_reverted();
1625   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1626   emit_int8(0x2F);
1627   emit_int8((unsigned char)(0xC0 | encode));
1628 }
1629 
1630 void Assembler::comiss(XMMRegister dst, Address src) {
1631   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1632   InstructionMark im(this);
1633   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1634   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1635   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1636   emit_int8(0x2F);
1637   emit_operand(dst, src);
1638 }
1639 
1640 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1641   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1642   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1643   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1644   emit_int8(0x2F);
1645   emit_int8((unsigned char)(0xC0 | encode));
1646 }
1647 
1648 void Assembler::cpuid() {
1649   emit_int8(0x0F);
1650   emit_int8((unsigned char)0xA2);
1651 }
1652 
1653 // Opcode / Instruction                      Op /  En  64 - Bit Mode     Compat / Leg Mode Description                  Implemented
1654 // F2 0F 38 F0 / r       CRC32 r32, r / m8   RM        Valid             Valid             Accumulate CRC32 on r / m8.  v
1655 // F2 REX 0F 38 F0 / r   CRC32 r32, r / m8*  RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1656 // F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8   RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1657 //
1658 // F2 0F 38 F1 / r       CRC32 r32, r / m16  RM        Valid             Valid             Accumulate CRC32 on r / m16. v
1659 //
1660 // F2 0F 38 F1 / r       CRC32 r32, r / m32  RM        Valid             Valid             Accumulate CRC32 on r / m32. v
1661 //
1662 // F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64  RM        Valid             N.E.              Accumulate CRC32 on r / m64. v
1663 void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) {
1664   assert(VM_Version::supports_sse4_2(), "");
1665   int8_t w = 0x01;
1666   Prefix p = Prefix_EMPTY;
1667 
1668   emit_int8((int8_t)0xF2);
1669   switch (sizeInBytes) {
1670   case 1:
1671     w = 0;
1672     break;
1673   case 2:
1674   case 4:
1675     break;
1676   LP64_ONLY(case 8:)
1677     // This instruction is not valid in 32 bits
1678     // Note:
1679     // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
1680     //
1681     // Page B - 72   Vol. 2C says
1682     // qwreg2 to qwreg            1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2
1683     // mem64 to qwreg             1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m
1684     //                                                                            F0!!!
1685     // while 3 - 208 Vol. 2A
1686     // F2 REX.W 0F 38 F1 / r       CRC32 r64, r / m64             RM         Valid      N.E.Accumulate CRC32 on r / m64.
1687     //
1688     // the 0 on a last bit is reserved for a different flavor of this instruction :
1689     // F2 REX.W 0F 38 F0 / r       CRC32 r64, r / m8              RM         Valid      N.E.Accumulate CRC32 on r / m8.
1690     p = REX_W;
1691     break;
1692   default:
1693     assert(0, "Unsupported value for a sizeInBytes argument");
1694     break;
1695   }
1696   LP64_ONLY(prefix(crc, v, p);)
1697   emit_int8((int8_t)0x0F);
1698   emit_int8(0x38);
1699   emit_int8((int8_t)(0xF0 | w));
1700   emit_int8(0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7));
1701 }
1702 
1703 void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) {
1704   assert(VM_Version::supports_sse4_2(), "");
1705   InstructionMark im(this);
1706   int8_t w = 0x01;
1707   Prefix p = Prefix_EMPTY;
1708 
1709   emit_int8((int8_t)0xF2);
1710   switch (sizeInBytes) {
1711   case 1:
1712     w = 0;
1713     break;
1714   case 2:
1715   case 4:
1716     break;
1717   LP64_ONLY(case 8:)
1718     // This instruction is not valid in 32 bits
1719     p = REX_W;
1720     break;
1721   default:
1722     assert(0, "Unsupported value for a sizeInBytes argument");
1723     break;
1724   }
1725   LP64_ONLY(prefix(crc, adr, p);)
1726   emit_int8((int8_t)0x0F);
1727   emit_int8(0x38);
1728   emit_int8((int8_t)(0xF0 | w));
1729   emit_operand(crc, adr);
1730 }
1731 
1732 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1733   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1734   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1735   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1736   emit_int8((unsigned char)0xE6);
1737   emit_int8((unsigned char)(0xC0 | encode));
1738 }
1739 
1740 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1741   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1742   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1743   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1744   emit_int8(0x5B);
1745   emit_int8((unsigned char)(0xC0 | encode));
1746 }
1747 
1748 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1749   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1750   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1751   attributes.set_rex_vex_w_reverted();
1752   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1753   emit_int8(0x5A);
1754   emit_int8((unsigned char)(0xC0 | encode));
1755 }
1756 
1757 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1758   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1759   InstructionMark im(this);
1760   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1761   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1762   attributes.set_rex_vex_w_reverted();
1763   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1764   emit_int8(0x5A);
1765   emit_operand(dst, src);
1766 }
1767 
1768 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1769   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1770   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1771   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1772   emit_int8(0x2A);
1773   emit_int8((unsigned char)(0xC0 | encode));
1774 }
1775 
1776 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1777   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1778   InstructionMark im(this);
1779   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1780   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1781   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1782   emit_int8(0x2A);
1783   emit_operand(dst, src);
1784 }
1785 
1786 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1787   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1788   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1789   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1790   emit_int8(0x2A);
1791   emit_int8((unsigned char)(0xC0 | encode));
1792 }
1793 
1794 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1795   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1796   InstructionMark im(this);
1797   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1798   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1799   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1800   emit_int8(0x2A);
1801   emit_operand(dst, src);
1802 }
1803 
1804 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
1805   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1806   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1807   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1808   emit_int8(0x2A);
1809   emit_int8((unsigned char)(0xC0 | encode));
1810 }
1811 
1812 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1813   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1814   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1815   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1816   emit_int8(0x5A);
1817   emit_int8((unsigned char)(0xC0 | encode));
1818 }
1819 
1820 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1821   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1822   InstructionMark im(this);
1823   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1824   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1825   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1826   emit_int8(0x5A);
1827   emit_operand(dst, src);
1828 }
1829 
1830 
1831 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1832   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1833   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1834   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1835   emit_int8(0x2C);
1836   emit_int8((unsigned char)(0xC0 | encode));
1837 }
1838 
1839 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1840   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1841   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1842   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1843   emit_int8(0x2C);
1844   emit_int8((unsigned char)(0xC0 | encode));
1845 }
1846 
1847 void Assembler::cvttpd2dq(XMMRegister dst, XMMRegister src) {
1848   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1849   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
1850   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1851   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1852   emit_int8((unsigned char)0xE6);
1853   emit_int8((unsigned char)(0xC0 | encode));
1854 }
1855 
1856 void Assembler::decl(Address dst) {
1857   // Don't use it directly. Use MacroAssembler::decrement() instead.
1858   InstructionMark im(this);
1859   prefix(dst);
1860   emit_int8((unsigned char)0xFF);
1861   emit_operand(rcx, dst);
1862 }
1863 
1864 void Assembler::divsd(XMMRegister dst, Address src) {
1865   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1866   InstructionMark im(this);
1867   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1868   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1869   attributes.set_rex_vex_w_reverted();
1870   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1871   emit_int8(0x5E);
1872   emit_operand(dst, src);
1873 }
1874 
1875 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1876   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1877   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1878   attributes.set_rex_vex_w_reverted();
1879   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1880   emit_int8(0x5E);
1881   emit_int8((unsigned char)(0xC0 | encode));
1882 }
1883 
1884 void Assembler::divss(XMMRegister dst, Address src) {
1885   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1886   InstructionMark im(this);
1887   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1888   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1889   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1890   emit_int8(0x5E);
1891   emit_operand(dst, src);
1892 }
1893 
1894 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1895   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1896   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1897   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1898   emit_int8(0x5E);
1899   emit_int8((unsigned char)(0xC0 | encode));
1900 }
1901 
1902 void Assembler::emms() {
1903   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1904   emit_int8(0x0F);
1905   emit_int8(0x77);
1906 }
1907 
1908 void Assembler::hlt() {
1909   emit_int8((unsigned char)0xF4);
1910 }
1911 
1912 void Assembler::idivl(Register src) {
1913   int encode = prefix_and_encode(src->encoding());
1914   emit_int8((unsigned char)0xF7);
1915   emit_int8((unsigned char)(0xF8 | encode));
1916 }
1917 
1918 void Assembler::divl(Register src) { // Unsigned
1919   int encode = prefix_and_encode(src->encoding());
1920   emit_int8((unsigned char)0xF7);
1921   emit_int8((unsigned char)(0xF0 | encode));
1922 }
1923 
1924 void Assembler::imull(Register src) {
1925   int encode = prefix_and_encode(src->encoding());
1926   emit_int8((unsigned char)0xF7);
1927   emit_int8((unsigned char)(0xE8 | encode));
1928 }
1929 
1930 void Assembler::imull(Register dst, Register src) {
1931   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1932   emit_int8(0x0F);
1933   emit_int8((unsigned char)0xAF);
1934   emit_int8((unsigned char)(0xC0 | encode));
1935 }
1936 
1937 
1938 void Assembler::imull(Register dst, Register src, int value) {
1939   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1940   if (is8bit(value)) {
1941     emit_int8(0x6B);
1942     emit_int8((unsigned char)(0xC0 | encode));
1943     emit_int8(value & 0xFF);
1944   } else {
1945     emit_int8(0x69);
1946     emit_int8((unsigned char)(0xC0 | encode));
1947     emit_int32(value);
1948   }
1949 }
1950 
1951 void Assembler::imull(Register dst, Address src) {
1952   InstructionMark im(this);
1953   prefix(src, dst);
1954   emit_int8(0x0F);
1955   emit_int8((unsigned char) 0xAF);
1956   emit_operand(dst, src);
1957 }
1958 
1959 
1960 void Assembler::incl(Address dst) {
1961   // Don't use it directly. Use MacroAssembler::increment() instead.
1962   InstructionMark im(this);
1963   prefix(dst);
1964   emit_int8((unsigned char)0xFF);
1965   emit_operand(rax, dst);
1966 }
1967 
1968 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
1969   InstructionMark im(this);
1970   assert((0 <= cc) && (cc < 16), "illegal cc");
1971   if (L.is_bound()) {
1972     address dst = target(L);
1973     assert(dst != NULL, "jcc most probably wrong");
1974 
1975     const int short_size = 2;
1976     const int long_size = 6;
1977     intptr_t offs = (intptr_t)dst - (intptr_t)pc();
1978     if (maybe_short && is8bit(offs - short_size)) {
1979       // 0111 tttn #8-bit disp
1980       emit_int8(0x70 | cc);
1981       emit_int8((offs - short_size) & 0xFF);
1982     } else {
1983       // 0000 1111 1000 tttn #32-bit disp
1984       assert(is_simm32(offs - long_size),
1985              "must be 32bit offset (call4)");
1986       emit_int8(0x0F);
1987       emit_int8((unsigned char)(0x80 | cc));
1988       emit_int32(offs - long_size);
1989     }
1990   } else {
1991     // Note: could eliminate cond. jumps to this jump if condition
1992     //       is the same however, seems to be rather unlikely case.
1993     // Note: use jccb() if label to be bound is very close to get
1994     //       an 8-bit displacement
1995     L.add_patch_at(code(), locator());
1996     emit_int8(0x0F);
1997     emit_int8((unsigned char)(0x80 | cc));
1998     emit_int32(0);
1999   }
2000 }
2001 
2002 void Assembler::jccb(Condition cc, Label& L) {
2003   if (L.is_bound()) {
2004     const int short_size = 2;
2005     address entry = target(L);
2006 #ifdef ASSERT
2007     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2008     intptr_t delta = short_branch_delta();
2009     if (delta != 0) {
2010       dist += (dist < 0 ? (-delta) :delta);
2011     }
2012     assert(is8bit(dist), "Dispacement too large for a short jmp");
2013 #endif
2014     intptr_t offs = (intptr_t)entry - (intptr_t)pc();
2015     // 0111 tttn #8-bit disp
2016     emit_int8(0x70 | cc);
2017     emit_int8((offs - short_size) & 0xFF);
2018   } else {
2019     InstructionMark im(this);
2020     L.add_patch_at(code(), locator());
2021     emit_int8(0x70 | cc);
2022     emit_int8(0);
2023   }
2024 }
2025 
2026 void Assembler::jmp(Address adr) {
2027   InstructionMark im(this);
2028   prefix(adr);
2029   emit_int8((unsigned char)0xFF);
2030   emit_operand(rsp, adr);
2031 }
2032 
2033 void Assembler::jmp(Label& L, bool maybe_short) {
2034   if (L.is_bound()) {
2035     address entry = target(L);
2036     assert(entry != NULL, "jmp most probably wrong");
2037     InstructionMark im(this);
2038     const int short_size = 2;
2039     const int long_size = 5;
2040     intptr_t offs = entry - pc();
2041     if (maybe_short && is8bit(offs - short_size)) {
2042       emit_int8((unsigned char)0xEB);
2043       emit_int8((offs - short_size) & 0xFF);
2044     } else {
2045       emit_int8((unsigned char)0xE9);
2046       emit_int32(offs - long_size);
2047     }
2048   } else {
2049     // By default, forward jumps are always 32-bit displacements, since
2050     // we can't yet know where the label will be bound.  If you're sure that
2051     // the forward jump will not run beyond 256 bytes, use jmpb to
2052     // force an 8-bit displacement.
2053     InstructionMark im(this);
2054     L.add_patch_at(code(), locator());
2055     emit_int8((unsigned char)0xE9);
2056     emit_int32(0);
2057   }
2058 }
2059 
2060 void Assembler::jmp(Register entry) {
2061   int encode = prefix_and_encode(entry->encoding());
2062   emit_int8((unsigned char)0xFF);
2063   emit_int8((unsigned char)(0xE0 | encode));
2064 }
2065 
2066 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
2067   InstructionMark im(this);
2068   emit_int8((unsigned char)0xE9);
2069   assert(dest != NULL, "must have a target");
2070   intptr_t disp = dest - (pc() + sizeof(int32_t));
2071   assert(is_simm32(disp), "must be 32bit offset (jmp)");
2072   emit_data(disp, rspec.reloc(), call32_operand);
2073 }
2074 
2075 void Assembler::jmpb(Label& L) {
2076   if (L.is_bound()) {
2077     const int short_size = 2;
2078     address entry = target(L);
2079     assert(entry != NULL, "jmp most probably wrong");
2080 #ifdef ASSERT
2081     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2082     intptr_t delta = short_branch_delta();
2083     if (delta != 0) {
2084       dist += (dist < 0 ? (-delta) :delta);
2085     }
2086     assert(is8bit(dist), "Dispacement too large for a short jmp");
2087 #endif
2088     intptr_t offs = entry - pc();
2089     emit_int8((unsigned char)0xEB);
2090     emit_int8((offs - short_size) & 0xFF);
2091   } else {
2092     InstructionMark im(this);
2093     L.add_patch_at(code(), locator());
2094     emit_int8((unsigned char)0xEB);
2095     emit_int8(0);
2096   }
2097 }
2098 
2099 void Assembler::ldmxcsr( Address src) {
2100   if (UseAVX > 0 ) {
2101     InstructionMark im(this);
2102     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2103     vex_prefix(src, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2104     emit_int8((unsigned char)0xAE);
2105     emit_operand(as_Register(2), src);
2106   } else {
2107     NOT_LP64(assert(VM_Version::supports_sse(), ""));
2108     InstructionMark im(this);
2109     prefix(src);
2110     emit_int8(0x0F);
2111     emit_int8((unsigned char)0xAE);
2112     emit_operand(as_Register(2), src);
2113   }
2114 }
2115 
2116 void Assembler::leal(Register dst, Address src) {
2117   InstructionMark im(this);
2118 #ifdef _LP64
2119   emit_int8(0x67); // addr32
2120   prefix(src, dst);
2121 #endif // LP64
2122   emit_int8((unsigned char)0x8D);
2123   emit_operand(dst, src);
2124 }
2125 
2126 void Assembler::lfence() {
2127   emit_int8(0x0F);
2128   emit_int8((unsigned char)0xAE);
2129   emit_int8((unsigned char)0xE8);
2130 }
2131 
2132 void Assembler::lock() {
2133   emit_int8((unsigned char)0xF0);
2134 }
2135 
2136 void Assembler::lzcntl(Register dst, Register src) {
2137   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
2138   emit_int8((unsigned char)0xF3);
2139   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2140   emit_int8(0x0F);
2141   emit_int8((unsigned char)0xBD);
2142   emit_int8((unsigned char)(0xC0 | encode));
2143 }
2144 
2145 // Emit mfence instruction
2146 void Assembler::mfence() {
2147   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
2148   emit_int8(0x0F);
2149   emit_int8((unsigned char)0xAE);
2150   emit_int8((unsigned char)0xF0);
2151 }
2152 
2153 void Assembler::mov(Register dst, Register src) {
2154   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2155 }
2156 
2157 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
2158   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2159   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2160   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2161   attributes.set_rex_vex_w_reverted();
2162   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2163   emit_int8(0x28);
2164   emit_int8((unsigned char)(0xC0 | encode));
2165 }
2166 
2167 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
2168   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2169   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2170   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2171   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2172   emit_int8(0x28);
2173   emit_int8((unsigned char)(0xC0 | encode));
2174 }
2175 
2176 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
2177   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2178   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2179   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2180   emit_int8(0x16);
2181   emit_int8((unsigned char)(0xC0 | encode));
2182 }
2183 
2184 void Assembler::movb(Register dst, Address src) {
2185   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2186   InstructionMark im(this);
2187   prefix(src, dst, true);
2188   emit_int8((unsigned char)0x8A);
2189   emit_operand(dst, src);
2190 }
2191 
2192 void Assembler::movddup(XMMRegister dst, XMMRegister src) {
2193   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
2194   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2195   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2196   attributes.set_rex_vex_w_reverted();
2197   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2198   emit_int8(0x12);
2199   emit_int8(0xC0 | encode);
2200 }
2201 
2202 void Assembler::kmovbl(KRegister dst, Register src) {
2203   assert(VM_Version::supports_avx512dq(), "");
2204   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2205   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2206   emit_int8((unsigned char)0x92);
2207   emit_int8((unsigned char)(0xC0 | encode));
2208 }
2209 
2210 void Assembler::kmovbl(Register dst, KRegister src) {
2211   assert(VM_Version::supports_avx512dq(), "");
2212   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2213   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2214   emit_int8((unsigned char)0x93);
2215   emit_int8((unsigned char)(0xC0 | encode));
2216 }
2217 
2218 void Assembler::kmovwl(KRegister dst, Register src) {
2219   assert(VM_Version::supports_evex(), "");
2220   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2221   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2222   emit_int8((unsigned char)0x92);
2223   emit_int8((unsigned char)(0xC0 | encode));
2224 }
2225 
2226 void Assembler::kmovwl(Register dst, KRegister src) {
2227   assert(VM_Version::supports_evex(), "");
2228   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2229   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2230   emit_int8((unsigned char)0x93);
2231   emit_int8((unsigned char)(0xC0 | encode));
2232 }
2233 
2234 void Assembler::kmovwl(KRegister dst, Address src) {
2235   assert(VM_Version::supports_evex(), "");
2236   InstructionMark im(this);
2237   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2238   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2239   emit_int8((unsigned char)0x90);
2240   emit_operand((Register)dst, src);
2241 }
2242 
2243 void Assembler::kmovdl(KRegister dst, Register src) {
2244   assert(VM_Version::supports_avx512bw(), "");
2245   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2246   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2247   emit_int8((unsigned char)0x92);
2248   emit_int8((unsigned char)(0xC0 | encode));
2249 }
2250 
2251 void Assembler::kmovdl(Register dst, KRegister src) {
2252   assert(VM_Version::supports_avx512bw(), "");
2253   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2254   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2255   emit_int8((unsigned char)0x93);
2256   emit_int8((unsigned char)(0xC0 | encode));
2257 }
2258 
2259 void Assembler::kmovql(KRegister dst, KRegister src) {
2260   assert(VM_Version::supports_avx512bw(), "");
2261   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2262   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2263   emit_int8((unsigned char)0x90);
2264   emit_int8((unsigned char)(0xC0 | encode));
2265 }
2266 
2267 void Assembler::kmovql(KRegister dst, Address src) {
2268   assert(VM_Version::supports_avx512bw(), "");
2269   InstructionMark im(this);
2270   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2271   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2272   emit_int8((unsigned char)0x90);
2273   emit_operand((Register)dst, src);
2274 }
2275 
2276 void Assembler::kmovql(Address dst, KRegister src) {
2277   assert(VM_Version::supports_avx512bw(), "");
2278   InstructionMark im(this);
2279   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2280   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2281   emit_int8((unsigned char)0x90);
2282   emit_operand((Register)src, dst);
2283 }
2284 
2285 void Assembler::kmovql(KRegister dst, Register src) {
2286   assert(VM_Version::supports_avx512bw(), "");
2287   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2288   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2289   emit_int8((unsigned char)0x92);
2290   emit_int8((unsigned char)(0xC0 | encode));
2291 }
2292 
2293 void Assembler::kmovql(Register dst, KRegister src) {
2294   assert(VM_Version::supports_avx512bw(), "");
2295   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2296   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2297   emit_int8((unsigned char)0x93);
2298   emit_int8((unsigned char)(0xC0 | encode));
2299 }
2300 
2301 void Assembler::knotwl(KRegister dst, KRegister src) {
2302   assert(VM_Version::supports_evex(), "");
2303   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2304   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2305   emit_int8((unsigned char)0x44);
2306   emit_int8((unsigned char)(0xC0 | encode));
2307 }
2308 
2309 // This instruction produces ZF or CF flags
2310 void Assembler::kortestbl(KRegister src1, KRegister src2) {
2311   assert(VM_Version::supports_avx512dq(), "");
2312   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2313   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2314   emit_int8((unsigned char)0x98);
2315   emit_int8((unsigned char)(0xC0 | encode));
2316 }
2317 
2318 // This instruction produces ZF or CF flags
2319 void Assembler::kortestwl(KRegister src1, KRegister src2) {
2320   assert(VM_Version::supports_evex(), "");
2321   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2322   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2323   emit_int8((unsigned char)0x98);
2324   emit_int8((unsigned char)(0xC0 | encode));
2325 }
2326 
2327 // This instruction produces ZF or CF flags
2328 void Assembler::kortestdl(KRegister src1, KRegister src2) {
2329   assert(VM_Version::supports_avx512bw(), "");
2330   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2331   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2332   emit_int8((unsigned char)0x98);
2333   emit_int8((unsigned char)(0xC0 | encode));
2334 }
2335 
2336 // This instruction produces ZF or CF flags
2337 void Assembler::kortestql(KRegister src1, KRegister src2) {
2338   assert(VM_Version::supports_avx512bw(), "");
2339   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2340   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2341   emit_int8((unsigned char)0x98);
2342   emit_int8((unsigned char)(0xC0 | encode));
2343 }
2344 
2345 // This instruction produces ZF or CF flags
2346 void Assembler::ktestql(KRegister src1, KRegister src2) {
2347   assert(VM_Version::supports_avx512bw(), "");
2348   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2349   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2350   emit_int8((unsigned char)0x99);
2351   emit_int8((unsigned char)(0xC0 | encode));
2352 }
2353 
2354 void Assembler::ktestq(KRegister src1, KRegister src2) {
2355   assert(VM_Version::supports_avx512bw(), "");
2356   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2357   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2358   emit_int8((unsigned char)0x99);
2359   emit_int8((unsigned char)(0xC0 | encode));
2360 }
2361 
2362 void Assembler::ktestd(KRegister src1, KRegister src2) {
2363   assert(VM_Version::supports_avx512bw(), "");
2364   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2365   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2366   emit_int8((unsigned char)0x99);
2367   emit_int8((unsigned char)(0xC0 | encode));
2368 }
2369 
2370 void Assembler::movb(Address dst, int imm8) {
2371   InstructionMark im(this);
2372    prefix(dst);
2373   emit_int8((unsigned char)0xC6);
2374   emit_operand(rax, dst, 1);
2375   emit_int8(imm8);
2376 }
2377 
2378 
2379 void Assembler::movb(Address dst, Register src) {
2380   assert(src->has_byte_register(), "must have byte register");
2381   InstructionMark im(this);
2382   prefix(dst, src, true);
2383   emit_int8((unsigned char)0x88);
2384   emit_operand(src, dst);
2385 }
2386 
2387 void Assembler::movdl(XMMRegister dst, Register src) {
2388   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2389   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2390   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2391   emit_int8(0x6E);
2392   emit_int8((unsigned char)(0xC0 | encode));
2393 }
2394 
2395 void Assembler::movdl(Register dst, XMMRegister src) {
2396   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2397   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2398   // swap src/dst to get correct prefix
2399   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2400   emit_int8(0x7E);
2401   emit_int8((unsigned char)(0xC0 | encode));
2402 }
2403 
2404 void Assembler::movdl(XMMRegister dst, Address src) {
2405   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2406   InstructionMark im(this);
2407   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2408   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2409   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2410   emit_int8(0x6E);
2411   emit_operand(dst, src);
2412 }
2413 
2414 void Assembler::movdl(Address dst, XMMRegister src) {
2415   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2416   InstructionMark im(this);
2417   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2418   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2419   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2420   emit_int8(0x7E);
2421   emit_operand(src, dst);
2422 }
2423 
2424 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
2425   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2426   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2427   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2428   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2429   emit_int8(0x6F);
2430   emit_int8((unsigned char)(0xC0 | encode));
2431 }
2432 
2433 void Assembler::movdqa(XMMRegister dst, Address src) {
2434   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2435   InstructionMark im(this);
2436   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2437   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2438   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2439   emit_int8(0x6F);
2440   emit_operand(dst, src);
2441 }
2442 
2443 void Assembler::movdqu(XMMRegister dst, Address src) {
2444   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2445   InstructionMark im(this);
2446   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2447   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2448   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2449   emit_int8(0x6F);
2450   emit_operand(dst, src);
2451 }
2452 
2453 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
2454   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2455   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2456   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2457   emit_int8(0x6F);
2458   emit_int8((unsigned char)(0xC0 | encode));
2459 }
2460 
2461 void Assembler::movdqu(Address dst, XMMRegister src) {
2462   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2463   InstructionMark im(this);
2464   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2465   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2466   attributes.reset_is_clear_context();
2467   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2468   emit_int8(0x7F);
2469   emit_operand(src, dst);
2470 }
2471 
2472 // Move Unaligned 256bit Vector
2473 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2474   assert(UseAVX > 0, "");
2475   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2476   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2477   emit_int8(0x6F);
2478   emit_int8((unsigned char)(0xC0 | encode));
2479 }
2480 
2481 void Assembler::vmovdqu(XMMRegister dst, Address src) {
2482   assert(UseAVX > 0, "");
2483   InstructionMark im(this);
2484   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2485   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2486   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2487   emit_int8(0x6F);
2488   emit_operand(dst, src);
2489 }
2490 
2491 void Assembler::vmovdqu(Address dst, XMMRegister src) {
2492   assert(UseAVX > 0, "");
2493   InstructionMark im(this);
2494   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2495   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2496   attributes.reset_is_clear_context();
2497   // swap src<->dst for encoding
2498   assert(src != xnoreg, "sanity");
2499   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2500   emit_int8(0x7F);
2501   emit_operand(src, dst);
2502 }
2503 
2504 // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
2505 void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) {
2506   assert(VM_Version::supports_evex(), "");
2507   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2508   attributes.set_is_evex_instruction();
2509   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2510   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2511   emit_int8(0x6F);
2512   emit_int8((unsigned char)(0xC0 | encode));
2513 }
2514 
2515 void Assembler::evmovdqub(XMMRegister dst, Address src, int vector_len) {
2516   assert(VM_Version::supports_evex(), "");
2517   InstructionMark im(this);
2518   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2519   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2520   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2521   attributes.set_is_evex_instruction();
2522   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2523   emit_int8(0x6F);
2524   emit_operand(dst, src);
2525 }
2526 
2527 void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
2528   assert(VM_Version::supports_evex(), "");
2529   assert(src != xnoreg, "sanity");
2530   InstructionMark im(this);
2531   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2532   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2533   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2534   attributes.set_is_evex_instruction();
2535   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2536   emit_int8(0x7F);
2537   emit_operand(src, dst);
2538 }
2539 
2540 void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, int vector_len) {
2541   assert(VM_Version::supports_avx512vlbw(), "");
2542   assert(is_vector_masking(), "");    // For stub code use only
2543   InstructionMark im(this);
2544   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
2545   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2546   attributes.set_embedded_opmask_register_specifier(mask);
2547   attributes.set_is_evex_instruction();
2548   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2549   emit_int8(0x6F);
2550   emit_operand(dst, src);
2551 }
2552 
2553 void Assembler::evmovdquw(XMMRegister dst, Address src, int vector_len) {
2554   assert(VM_Version::supports_evex(), "");
2555   InstructionMark im(this);
2556   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2557   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2558   attributes.set_is_evex_instruction();
2559   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2560   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2561   emit_int8(0x6F);
2562   emit_operand(dst, src);
2563 }
2564 
2565 void Assembler::evmovdquw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
2566   assert(is_vector_masking(), "");
2567   assert(VM_Version::supports_avx512vlbw(), "");
2568   InstructionMark im(this);
2569   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
2570   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2571   attributes.set_embedded_opmask_register_specifier(mask);
2572   attributes.set_is_evex_instruction();
2573   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2574   emit_int8(0x6F);
2575   emit_operand(dst, src);
2576 }
2577 
2578 void Assembler::evmovdquw(Address dst, XMMRegister src, int vector_len) {
2579   assert(VM_Version::supports_evex(), "");
2580   assert(src != xnoreg, "sanity");
2581   InstructionMark im(this);
2582   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2583   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2584   attributes.set_is_evex_instruction();
2585   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2586   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2587   emit_int8(0x7F);
2588   emit_operand(src, dst);
2589 }
2590 
2591 void Assembler::evmovdquw(Address dst, KRegister mask, XMMRegister src, int vector_len) {
2592   assert(VM_Version::supports_avx512vlbw(), "");
2593   assert(src != xnoreg, "sanity");
2594   InstructionMark im(this);
2595   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2596   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2597   attributes.reset_is_clear_context();
2598   attributes.set_embedded_opmask_register_specifier(mask);
2599   attributes.set_is_evex_instruction();
2600   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2601   emit_int8(0x7F);
2602   emit_operand(src, dst);
2603 }
2604 
2605 void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
2606   assert(VM_Version::supports_evex(), "");
2607   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2608   attributes.set_is_evex_instruction();
2609   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2610   emit_int8(0x6F);
2611   emit_int8((unsigned char)(0xC0 | encode));
2612 }
2613 
2614 void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) {
2615   assert(VM_Version::supports_evex(), "");
2616   InstructionMark im(this);
2617   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false , /* uses_vl */ true);
2618   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2619   attributes.set_is_evex_instruction();
2620   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2621   emit_int8(0x6F);
2622   emit_operand(dst, src);
2623 }
2624 
2625 void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
2626   assert(VM_Version::supports_evex(), "");
2627   assert(src != xnoreg, "sanity");
2628   InstructionMark im(this);
2629   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2630   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2631   attributes.reset_is_clear_context();
2632   attributes.set_is_evex_instruction();
2633   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2634   emit_int8(0x7F);
2635   emit_operand(src, dst);
2636 }
2637 
2638 void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
2639   assert(VM_Version::supports_evex(), "");
2640   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2641   attributes.set_is_evex_instruction();
2642   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2643   emit_int8(0x6F);
2644   emit_int8((unsigned char)(0xC0 | encode));
2645 }
2646 
2647 void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
2648   assert(VM_Version::supports_evex(), "");
2649   InstructionMark im(this);
2650   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2651   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2652   attributes.set_is_evex_instruction();
2653   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2654   emit_int8(0x6F);
2655   emit_operand(dst, src);
2656 }
2657 
2658 void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
2659   assert(VM_Version::supports_evex(), "");
2660   assert(src != xnoreg, "sanity");
2661   InstructionMark im(this);
2662   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2663   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2664   attributes.reset_is_clear_context();
2665   attributes.set_is_evex_instruction();
2666   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2667   emit_int8(0x7F);
2668   emit_operand(src, dst);
2669 }
2670 
2671 // Uses zero extension on 64bit
2672 
2673 void Assembler::movl(Register dst, int32_t imm32) {
2674   int encode = prefix_and_encode(dst->encoding());
2675   emit_int8((unsigned char)(0xB8 | encode));
2676   emit_int32(imm32);
2677 }
2678 
2679 void Assembler::movl(Register dst, Register src) {
2680   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2681   emit_int8((unsigned char)0x8B);
2682   emit_int8((unsigned char)(0xC0 | encode));
2683 }
2684 
2685 void Assembler::movl(Register dst, Address src) {
2686   InstructionMark im(this);
2687   prefix(src, dst);
2688   emit_int8((unsigned char)0x8B);
2689   emit_operand(dst, src);
2690 }
2691 
2692 void Assembler::movl(Address dst, int32_t imm32) {
2693   InstructionMark im(this);
2694   prefix(dst);
2695   emit_int8((unsigned char)0xC7);
2696   emit_operand(rax, dst, 4);
2697   emit_int32(imm32);
2698 }
2699 
2700 void Assembler::movl(Address dst, Register src) {
2701   InstructionMark im(this);
2702   prefix(dst, src);
2703   emit_int8((unsigned char)0x89);
2704   emit_operand(src, dst);
2705 }
2706 
2707 // New cpus require to use movsd and movss to avoid partial register stall
2708 // when loading from memory. But for old Opteron use movlpd instead of movsd.
2709 // The selection is done in MacroAssembler::movdbl() and movflt().
2710 void Assembler::movlpd(XMMRegister dst, Address src) {
2711   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2712   InstructionMark im(this);
2713   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2714   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2715   attributes.set_rex_vex_w_reverted();
2716   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2717   emit_int8(0x12);
2718   emit_operand(dst, src);
2719 }
2720 
2721 void Assembler::movq( MMXRegister dst, Address src ) {
2722   assert( VM_Version::supports_mmx(), "" );
2723   emit_int8(0x0F);
2724   emit_int8(0x6F);
2725   emit_operand(dst, src);
2726 }
2727 
2728 void Assembler::movq( Address dst, MMXRegister src ) {
2729   assert( VM_Version::supports_mmx(), "" );
2730   emit_int8(0x0F);
2731   emit_int8(0x7F);
2732   // workaround gcc (3.2.1-7a) bug
2733   // In that version of gcc with only an emit_operand(MMX, Address)
2734   // gcc will tail jump and try and reverse the parameters completely
2735   // obliterating dst in the process. By having a version available
2736   // that doesn't need to swap the args at the tail jump the bug is
2737   // avoided.
2738   emit_operand(dst, src);
2739 }
2740 
2741 void Assembler::movq(XMMRegister dst, Address src) {
2742   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2743   InstructionMark im(this);
2744   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2745   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2746   attributes.set_rex_vex_w_reverted();
2747   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2748   emit_int8(0x7E);
2749   emit_operand(dst, src);
2750 }
2751 
2752 void Assembler::movq(Address dst, XMMRegister src) {
2753   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2754   InstructionMark im(this);
2755   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2756   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2757   attributes.set_rex_vex_w_reverted();
2758   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2759   emit_int8((unsigned char)0xD6);
2760   emit_operand(src, dst);
2761 }
2762 
2763 void Assembler::movsbl(Register dst, Address src) { // movsxb
2764   InstructionMark im(this);
2765   prefix(src, dst);
2766   emit_int8(0x0F);
2767   emit_int8((unsigned char)0xBE);
2768   emit_operand(dst, src);
2769 }
2770 
2771 void Assembler::movsbl(Register dst, Register src) { // movsxb
2772   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2773   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2774   emit_int8(0x0F);
2775   emit_int8((unsigned char)0xBE);
2776   emit_int8((unsigned char)(0xC0 | encode));
2777 }
2778 
2779 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
2780   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2781   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2782   attributes.set_rex_vex_w_reverted();
2783   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2784   emit_int8(0x10);
2785   emit_int8((unsigned char)(0xC0 | encode));
2786 }
2787 
2788 void Assembler::movsd(XMMRegister dst, Address src) {
2789   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2790   InstructionMark im(this);
2791   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2792   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2793   attributes.set_rex_vex_w_reverted();
2794   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2795   emit_int8(0x10);
2796   emit_operand(dst, src);
2797 }
2798 
2799 void Assembler::movsd(Address dst, XMMRegister src) {
2800   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2801   InstructionMark im(this);
2802   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2803   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2804   attributes.reset_is_clear_context();
2805   attributes.set_rex_vex_w_reverted();
2806   simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2807   emit_int8(0x11);
2808   emit_operand(src, dst);
2809 }
2810 
2811 void Assembler::movss(XMMRegister dst, XMMRegister src) {
2812   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2813   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2814   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2815   emit_int8(0x10);
2816   emit_int8((unsigned char)(0xC0 | encode));
2817 }
2818 
2819 void Assembler::movss(XMMRegister dst, Address src) {
2820   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2821   InstructionMark im(this);
2822   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2823   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2824   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2825   emit_int8(0x10);
2826   emit_operand(dst, src);
2827 }
2828 
2829 void Assembler::movss(Address dst, XMMRegister src) {
2830   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2831   InstructionMark im(this);
2832   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2833   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2834   attributes.reset_is_clear_context();
2835   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2836   emit_int8(0x11);
2837   emit_operand(src, dst);
2838 }
2839 
2840 void Assembler::movswl(Register dst, Address src) { // movsxw
2841   InstructionMark im(this);
2842   prefix(src, dst);
2843   emit_int8(0x0F);
2844   emit_int8((unsigned char)0xBF);
2845   emit_operand(dst, src);
2846 }
2847 
2848 void Assembler::movswl(Register dst, Register src) { // movsxw
2849   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2850   emit_int8(0x0F);
2851   emit_int8((unsigned char)0xBF);
2852   emit_int8((unsigned char)(0xC0 | encode));
2853 }
2854 
2855 void Assembler::movw(Address dst, int imm16) {
2856   InstructionMark im(this);
2857 
2858   emit_int8(0x66); // switch to 16-bit mode
2859   prefix(dst);
2860   emit_int8((unsigned char)0xC7);
2861   emit_operand(rax, dst, 2);
2862   emit_int16(imm16);
2863 }
2864 
2865 void Assembler::movw(Register dst, Address src) {
2866   InstructionMark im(this);
2867   emit_int8(0x66);
2868   prefix(src, dst);
2869   emit_int8((unsigned char)0x8B);
2870   emit_operand(dst, src);
2871 }
2872 
2873 void Assembler::movw(Address dst, Register src) {
2874   InstructionMark im(this);
2875   emit_int8(0x66);
2876   prefix(dst, src);
2877   emit_int8((unsigned char)0x89);
2878   emit_operand(src, dst);
2879 }
2880 
2881 void Assembler::movzbl(Register dst, Address src) { // movzxb
2882   InstructionMark im(this);
2883   prefix(src, dst);
2884   emit_int8(0x0F);
2885   emit_int8((unsigned char)0xB6);
2886   emit_operand(dst, src);
2887 }
2888 
2889 void Assembler::movzbl(Register dst, Register src) { // movzxb
2890   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2891   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2892   emit_int8(0x0F);
2893   emit_int8((unsigned char)0xB6);
2894   emit_int8(0xC0 | encode);
2895 }
2896 
2897 void Assembler::movzwl(Register dst, Address src) { // movzxw
2898   InstructionMark im(this);
2899   prefix(src, dst);
2900   emit_int8(0x0F);
2901   emit_int8((unsigned char)0xB7);
2902   emit_operand(dst, src);
2903 }
2904 
2905 void Assembler::movzwl(Register dst, Register src) { // movzxw
2906   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2907   emit_int8(0x0F);
2908   emit_int8((unsigned char)0xB7);
2909   emit_int8(0xC0 | encode);
2910 }
2911 
2912 void Assembler::mull(Address src) {
2913   InstructionMark im(this);
2914   prefix(src);
2915   emit_int8((unsigned char)0xF7);
2916   emit_operand(rsp, src);
2917 }
2918 
2919 void Assembler::mull(Register src) {
2920   int encode = prefix_and_encode(src->encoding());
2921   emit_int8((unsigned char)0xF7);
2922   emit_int8((unsigned char)(0xE0 | encode));
2923 }
2924 
2925 void Assembler::mulsd(XMMRegister dst, Address src) {
2926   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2927   InstructionMark im(this);
2928   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2929   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2930   attributes.set_rex_vex_w_reverted();
2931   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2932   emit_int8(0x59);
2933   emit_operand(dst, src);
2934 }
2935 
2936 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
2937   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2938   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2939   attributes.set_rex_vex_w_reverted();
2940   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2941   emit_int8(0x59);
2942   emit_int8((unsigned char)(0xC0 | encode));
2943 }
2944 
2945 void Assembler::mulss(XMMRegister dst, Address src) {
2946   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2947   InstructionMark im(this);
2948   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2949   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2950   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2951   emit_int8(0x59);
2952   emit_operand(dst, src);
2953 }
2954 
2955 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
2956   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2957   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2958   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2959   emit_int8(0x59);
2960   emit_int8((unsigned char)(0xC0 | encode));
2961 }
2962 
2963 void Assembler::negl(Register dst) {
2964   int encode = prefix_and_encode(dst->encoding());
2965   emit_int8((unsigned char)0xF7);
2966   emit_int8((unsigned char)(0xD8 | encode));
2967 }
2968 
2969 void Assembler::nop(int i) {
2970 #ifdef ASSERT
2971   assert(i > 0, " ");
2972   // The fancy nops aren't currently recognized by debuggers making it a
2973   // pain to disassemble code while debugging. If asserts are on clearly
2974   // speed is not an issue so simply use the single byte traditional nop
2975   // to do alignment.
2976 
2977   for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
2978   return;
2979 
2980 #endif // ASSERT
2981 
2982   if (UseAddressNop && VM_Version::is_intel()) {
2983     //
2984     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
2985     //  1: 0x90
2986     //  2: 0x66 0x90
2987     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2988     //  4: 0x0F 0x1F 0x40 0x00
2989     //  5: 0x0F 0x1F 0x44 0x00 0x00
2990     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2991     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2992     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2993     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2994     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2995     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2996 
2997     // The rest coding is Intel specific - don't use consecutive address nops
2998 
2999     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3000     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3001     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3002     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3003 
3004     while(i >= 15) {
3005       // For Intel don't generate consecutive addess nops (mix with regular nops)
3006       i -= 15;
3007       emit_int8(0x66);   // size prefix
3008       emit_int8(0x66);   // size prefix
3009       emit_int8(0x66);   // size prefix
3010       addr_nop_8();
3011       emit_int8(0x66);   // size prefix
3012       emit_int8(0x66);   // size prefix
3013       emit_int8(0x66);   // size prefix
3014       emit_int8((unsigned char)0x90);
3015                          // nop
3016     }
3017     switch (i) {
3018       case 14:
3019         emit_int8(0x66); // size prefix
3020       case 13:
3021         emit_int8(0x66); // size prefix
3022       case 12:
3023         addr_nop_8();
3024         emit_int8(0x66); // size prefix
3025         emit_int8(0x66); // size prefix
3026         emit_int8(0x66); // size prefix
3027         emit_int8((unsigned char)0x90);
3028                          // nop
3029         break;
3030       case 11:
3031         emit_int8(0x66); // size prefix
3032       case 10:
3033         emit_int8(0x66); // size prefix
3034       case 9:
3035         emit_int8(0x66); // size prefix
3036       case 8:
3037         addr_nop_8();
3038         break;
3039       case 7:
3040         addr_nop_7();
3041         break;
3042       case 6:
3043         emit_int8(0x66); // size prefix
3044       case 5:
3045         addr_nop_5();
3046         break;
3047       case 4:
3048         addr_nop_4();
3049         break;
3050       case 3:
3051         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3052         emit_int8(0x66); // size prefix
3053       case 2:
3054         emit_int8(0x66); // size prefix
3055       case 1:
3056         emit_int8((unsigned char)0x90);
3057                          // nop
3058         break;
3059       default:
3060         assert(i == 0, " ");
3061     }
3062     return;
3063   }
3064   if (UseAddressNop && VM_Version::is_amd()) {
3065     //
3066     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
3067     //  1: 0x90
3068     //  2: 0x66 0x90
3069     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3070     //  4: 0x0F 0x1F 0x40 0x00
3071     //  5: 0x0F 0x1F 0x44 0x00 0x00
3072     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3073     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3074     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3075     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3076     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3077     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3078 
3079     // The rest coding is AMD specific - use consecutive address nops
3080 
3081     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3082     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3083     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3084     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3085     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3086     //     Size prefixes (0x66) are added for larger sizes
3087 
3088     while(i >= 22) {
3089       i -= 11;
3090       emit_int8(0x66); // size prefix
3091       emit_int8(0x66); // size prefix
3092       emit_int8(0x66); // size prefix
3093       addr_nop_8();
3094     }
3095     // Generate first nop for size between 21-12
3096     switch (i) {
3097       case 21:
3098         i -= 1;
3099         emit_int8(0x66); // size prefix
3100       case 20:
3101       case 19:
3102         i -= 1;
3103         emit_int8(0x66); // size prefix
3104       case 18:
3105       case 17:
3106         i -= 1;
3107         emit_int8(0x66); // size prefix
3108       case 16:
3109       case 15:
3110         i -= 8;
3111         addr_nop_8();
3112         break;
3113       case 14:
3114       case 13:
3115         i -= 7;
3116         addr_nop_7();
3117         break;
3118       case 12:
3119         i -= 6;
3120         emit_int8(0x66); // size prefix
3121         addr_nop_5();
3122         break;
3123       default:
3124         assert(i < 12, " ");
3125     }
3126 
3127     // Generate second nop for size between 11-1
3128     switch (i) {
3129       case 11:
3130         emit_int8(0x66); // size prefix
3131       case 10:
3132         emit_int8(0x66); // size prefix
3133       case 9:
3134         emit_int8(0x66); // size prefix
3135       case 8:
3136         addr_nop_8();
3137         break;
3138       case 7:
3139         addr_nop_7();
3140         break;
3141       case 6:
3142         emit_int8(0x66); // size prefix
3143       case 5:
3144         addr_nop_5();
3145         break;
3146       case 4:
3147         addr_nop_4();
3148         break;
3149       case 3:
3150         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3151         emit_int8(0x66); // size prefix
3152       case 2:
3153         emit_int8(0x66); // size prefix
3154       case 1:
3155         emit_int8((unsigned char)0x90);
3156                          // nop
3157         break;
3158       default:
3159         assert(i == 0, " ");
3160     }
3161     return;
3162   }
3163 
3164   if (UseAddressNop && VM_Version::is_zx()) {
3165     //
3166     // Using multi-bytes nops "0x0F 0x1F [address]" for ZX
3167     //  1: 0x90
3168     //  2: 0x66 0x90
3169     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3170     //  4: 0x0F 0x1F 0x40 0x00
3171     //  5: 0x0F 0x1F 0x44 0x00 0x00
3172     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3173     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3174     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3175     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3176     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3177     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3178 
3179     // The rest coding is ZX specific - don't use consecutive address nops
3180 
3181     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3182     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3183     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3184     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3185 
3186     while (i >= 15) {
3187       // For ZX don't generate consecutive addess nops (mix with regular nops)
3188       i -= 15;
3189       emit_int8(0x66);   // size prefix
3190       emit_int8(0x66);   // size prefix
3191       emit_int8(0x66);   // size prefix
3192       addr_nop_8();
3193       emit_int8(0x66);   // size prefix
3194       emit_int8(0x66);   // size prefix
3195       emit_int8(0x66);   // size prefix
3196       emit_int8((unsigned char)0x90);
3197                          // nop
3198     }
3199     switch (i) {
3200       case 14:
3201         emit_int8(0x66); // size prefix
3202       case 13:
3203         emit_int8(0x66); // size prefix
3204       case 12:
3205         addr_nop_8();
3206         emit_int8(0x66); // size prefix
3207         emit_int8(0x66); // size prefix
3208         emit_int8(0x66); // size prefix
3209         emit_int8((unsigned char)0x90);
3210                          // nop
3211         break;
3212       case 11:
3213         emit_int8(0x66); // size prefix
3214       case 10:
3215         emit_int8(0x66); // size prefix
3216       case 9:
3217         emit_int8(0x66); // size prefix
3218       case 8:
3219         addr_nop_8();
3220         break;
3221       case 7:
3222         addr_nop_7();
3223         break;
3224       case 6:
3225         emit_int8(0x66); // size prefix
3226       case 5:
3227         addr_nop_5();
3228         break;
3229       case 4:
3230         addr_nop_4();
3231         break;
3232       case 3:
3233         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3234         emit_int8(0x66); // size prefix
3235       case 2:
3236         emit_int8(0x66); // size prefix
3237       case 1:
3238         emit_int8((unsigned char)0x90);
3239                          // nop
3240         break;
3241       default:
3242         assert(i == 0, " ");
3243     }
3244     return;
3245   }
3246 
3247   // Using nops with size prefixes "0x66 0x90".
3248   // From AMD Optimization Guide:
3249   //  1: 0x90
3250   //  2: 0x66 0x90
3251   //  3: 0x66 0x66 0x90
3252   //  4: 0x66 0x66 0x66 0x90
3253   //  5: 0x66 0x66 0x90 0x66 0x90
3254   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
3255   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
3256   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
3257   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3258   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3259   //
3260   while(i > 12) {
3261     i -= 4;
3262     emit_int8(0x66); // size prefix
3263     emit_int8(0x66);
3264     emit_int8(0x66);
3265     emit_int8((unsigned char)0x90);
3266                      // nop
3267   }
3268   // 1 - 12 nops
3269   if(i > 8) {
3270     if(i > 9) {
3271       i -= 1;
3272       emit_int8(0x66);
3273     }
3274     i -= 3;
3275     emit_int8(0x66);
3276     emit_int8(0x66);
3277     emit_int8((unsigned char)0x90);
3278   }
3279   // 1 - 8 nops
3280   if(i > 4) {
3281     if(i > 6) {
3282       i -= 1;
3283       emit_int8(0x66);
3284     }
3285     i -= 3;
3286     emit_int8(0x66);
3287     emit_int8(0x66);
3288     emit_int8((unsigned char)0x90);
3289   }
3290   switch (i) {
3291     case 4:
3292       emit_int8(0x66);
3293     case 3:
3294       emit_int8(0x66);
3295     case 2:
3296       emit_int8(0x66);
3297     case 1:
3298       emit_int8((unsigned char)0x90);
3299       break;
3300     default:
3301       assert(i == 0, " ");
3302   }
3303 }
3304 
3305 void Assembler::notl(Register dst) {
3306   int encode = prefix_and_encode(dst->encoding());
3307   emit_int8((unsigned char)0xF7);
3308   emit_int8((unsigned char)(0xD0 | encode));
3309 }
3310 
3311 void Assembler::orl(Address dst, int32_t imm32) {
3312   InstructionMark im(this);
3313   prefix(dst);
3314   emit_arith_operand(0x81, rcx, dst, imm32);
3315 }
3316 
3317 void Assembler::orl(Register dst, int32_t imm32) {
3318   prefix(dst);
3319   emit_arith(0x81, 0xC8, dst, imm32);
3320 }
3321 
3322 void Assembler::orl(Register dst, Address src) {
3323   InstructionMark im(this);
3324   prefix(src, dst);
3325   emit_int8(0x0B);
3326   emit_operand(dst, src);
3327 }
3328 
3329 void Assembler::orl(Register dst, Register src) {
3330   (void) prefix_and_encode(dst->encoding(), src->encoding());
3331   emit_arith(0x0B, 0xC0, dst, src);
3332 }
3333 
3334 void Assembler::orl(Address dst, Register src) {
3335   InstructionMark im(this);
3336   prefix(dst, src);
3337   emit_int8(0x09);
3338   emit_operand(src, dst);
3339 }
3340 
3341 void Assembler::orb(Address dst, int imm8) {
3342   InstructionMark im(this);
3343   prefix(dst);
3344   emit_int8((unsigned char)0x80);
3345   emit_operand(rcx, dst, 1);
3346   emit_int8(imm8);
3347 }
3348 
3349 void Assembler::packuswb(XMMRegister dst, Address src) {
3350   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3351   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3352   InstructionMark im(this);
3353   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3354   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3355   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3356   emit_int8(0x67);
3357   emit_operand(dst, src);
3358 }
3359 
3360 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
3361   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3362   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3363   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3364   emit_int8(0x67);
3365   emit_int8((unsigned char)(0xC0 | encode));
3366 }
3367 
3368 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3369   assert(UseAVX > 0, "some form of AVX must be enabled");
3370   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3371   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3372   emit_int8(0x67);
3373   emit_int8((unsigned char)(0xC0 | encode));
3374 }
3375 
3376 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
3377   assert(VM_Version::supports_avx2(), "");
3378   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3379   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3380   emit_int8(0x00);
3381   emit_int8(0xC0 | encode);
3382   emit_int8(imm8);
3383 }
3384 
3385 void Assembler::vperm2i128(XMMRegister dst,  XMMRegister nds, XMMRegister src, int imm8) {
3386   assert(VM_Version::supports_avx2(), "");
3387   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3388   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3389   emit_int8(0x46);
3390   emit_int8(0xC0 | encode);
3391   emit_int8(imm8);
3392 }
3393 
3394 void Assembler::vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
3395   assert(VM_Version::supports_avx(), "");
3396   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3397   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3398   emit_int8(0x06);
3399   emit_int8(0xC0 | encode);
3400   emit_int8(imm8);
3401 }
3402 
3403 
3404 void Assembler::pause() {
3405   emit_int8((unsigned char)0xF3);
3406   emit_int8((unsigned char)0x90);
3407 }
3408 
3409 void Assembler::ud2() {
3410   emit_int8(0x0F);
3411   emit_int8(0x0B);
3412 }
3413 
3414 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
3415   assert(VM_Version::supports_sse4_2(), "");
3416   InstructionMark im(this);
3417   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3418   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3419   emit_int8(0x61);
3420   emit_operand(dst, src);
3421   emit_int8(imm8);
3422 }
3423 
3424 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
3425   assert(VM_Version::supports_sse4_2(), "");
3426   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3427   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3428   emit_int8(0x61);
3429   emit_int8((unsigned char)(0xC0 | encode));
3430   emit_int8(imm8);
3431 }
3432 
3433 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3434 void Assembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
3435   assert(VM_Version::supports_sse2(), "");
3436   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3437   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3438   emit_int8(0x74);
3439   emit_int8((unsigned char)(0xC0 | encode));
3440 }
3441 
3442 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3443 void Assembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3444   assert(VM_Version::supports_avx(), "");
3445   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3446   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3447   emit_int8(0x74);
3448   emit_int8((unsigned char)(0xC0 | encode));
3449 }
3450 
3451 // In this context, kdst is written the mask used to process the equal components
3452 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3453   assert(VM_Version::supports_avx512bw(), "");
3454   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3455   attributes.set_is_evex_instruction();
3456   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3457   emit_int8(0x74);
3458   emit_int8((unsigned char)(0xC0 | encode));
3459 }
3460 
3461 void Assembler::evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3462   assert(VM_Version::supports_avx512vlbw(), "");
3463   InstructionMark im(this);
3464   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3465   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3466   attributes.set_is_evex_instruction();
3467   int dst_enc = kdst->encoding();
3468   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3469   emit_int8(0x64);
3470   emit_operand(as_Register(dst_enc), src);
3471 }
3472 
3473 void Assembler::evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3474   assert(is_vector_masking(), "");
3475   assert(VM_Version::supports_avx512vlbw(), "");
3476   InstructionMark im(this);
3477   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3478   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3479   attributes.reset_is_clear_context();
3480   attributes.set_embedded_opmask_register_specifier(mask);
3481   attributes.set_is_evex_instruction();
3482   int dst_enc = kdst->encoding();
3483   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3484   emit_int8(0x64);
3485   emit_operand(as_Register(dst_enc), src);
3486 }
3487 
3488 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3489   assert(VM_Version::supports_avx512vlbw(), "");
3490   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3491   attributes.set_is_evex_instruction();
3492   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3493   emit_int8(0x3E);
3494   emit_int8((unsigned char)(0xC0 | encode));
3495   emit_int8(vcc);
3496 }
3497 
3498 void Assembler::evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3499   assert(is_vector_masking(), "");
3500   assert(VM_Version::supports_avx512vlbw(), "");
3501   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3502   attributes.reset_is_clear_context();
3503   attributes.set_embedded_opmask_register_specifier(mask);
3504   attributes.set_is_evex_instruction();
3505   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3506   emit_int8(0x3E);
3507   emit_int8((unsigned char)(0xC0 | encode));
3508   emit_int8(vcc);
3509 }
3510 
3511 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len) {
3512   assert(VM_Version::supports_avx512vlbw(), "");
3513   InstructionMark im(this);
3514   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3515   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3516   attributes.set_is_evex_instruction();
3517   int dst_enc = kdst->encoding();
3518   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3519   emit_int8(0x3E);
3520   emit_operand(as_Register(dst_enc), src);
3521   emit_int8(vcc);
3522 }
3523 
3524 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3525   assert(VM_Version::supports_avx512bw(), "");
3526   InstructionMark im(this);
3527   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3528   attributes.set_is_evex_instruction();
3529   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3530   int dst_enc = kdst->encoding();
3531   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3532   emit_int8(0x74);
3533   emit_operand(as_Register(dst_enc), src);
3534 }
3535 
3536 void Assembler::evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3537   assert(VM_Version::supports_avx512vlbw(), "");
3538   assert(is_vector_masking(), "");    // For stub code use only
3539   InstructionMark im(this);
3540   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_reg_mask */ false, /* uses_vl */ false);
3541   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3542   attributes.reset_is_clear_context();
3543   attributes.set_embedded_opmask_register_specifier(mask);
3544   attributes.set_is_evex_instruction();
3545   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3546   emit_int8(0x74);
3547   emit_operand(as_Register(kdst->encoding()), src);
3548 }
3549 
3550 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3551 void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
3552   assert(VM_Version::supports_sse2(), "");
3553   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3554   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3555   emit_int8(0x75);
3556   emit_int8((unsigned char)(0xC0 | encode));
3557 }
3558 
3559 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3560 void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3561   assert(VM_Version::supports_avx(), "");
3562   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3563   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3564   emit_int8(0x75);
3565   emit_int8((unsigned char)(0xC0 | encode));
3566 }
3567 
3568 // In this context, kdst is written the mask used to process the equal components
3569 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3570   assert(VM_Version::supports_avx512bw(), "");
3571   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3572   attributes.set_is_evex_instruction();
3573   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3574   emit_int8(0x75);
3575   emit_int8((unsigned char)(0xC0 | encode));
3576 }
3577 
3578 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3579   assert(VM_Version::supports_avx512bw(), "");
3580   InstructionMark im(this);
3581   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3582   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3583   attributes.set_is_evex_instruction();
3584   int dst_enc = kdst->encoding();
3585   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3586   emit_int8(0x75);
3587   emit_operand(as_Register(dst_enc), src);
3588 }
3589 
3590 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3591 void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
3592   assert(VM_Version::supports_sse2(), "");
3593   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3594   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3595   emit_int8(0x76);
3596   emit_int8((unsigned char)(0xC0 | encode));
3597 }
3598 
3599 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3600 void Assembler::vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3601   assert(VM_Version::supports_avx(), "");
3602   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3603   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3604   emit_int8(0x76);
3605   emit_int8((unsigned char)(0xC0 | encode));
3606 }
3607 
3608 // In this context, kdst is written the mask used to process the equal components
3609 void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3610   assert(VM_Version::supports_evex(), "");
3611   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3612   attributes.set_is_evex_instruction();
3613   attributes.reset_is_clear_context();
3614   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3615   emit_int8(0x76);
3616   emit_int8((unsigned char)(0xC0 | encode));
3617 }
3618 
3619 void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3620   assert(VM_Version::supports_evex(), "");
3621   InstructionMark im(this);
3622   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3623   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3624   attributes.reset_is_clear_context();
3625   attributes.set_is_evex_instruction();
3626   int dst_enc = kdst->encoding();
3627   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3628   emit_int8(0x76);
3629   emit_operand(as_Register(dst_enc), src);
3630 }
3631 
3632 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3633 void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) {
3634   assert(VM_Version::supports_sse4_1(), "");
3635   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3636   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3637   emit_int8(0x29);
3638   emit_int8((unsigned char)(0xC0 | encode));
3639 }
3640 
3641 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3642 void Assembler::vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3643   assert(VM_Version::supports_avx(), "");
3644   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3645   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3646   emit_int8(0x29);
3647   emit_int8((unsigned char)(0xC0 | encode));
3648 }
3649 
3650 // In this context, kdst is written the mask used to process the equal components
3651 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3652   assert(VM_Version::supports_evex(), "");
3653   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3654   attributes.reset_is_clear_context();
3655   attributes.set_is_evex_instruction();
3656   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3657   emit_int8(0x29);
3658   emit_int8((unsigned char)(0xC0 | encode));
3659 }
3660 
3661 // In this context, kdst is written the mask used to process the equal components
3662 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3663   assert(VM_Version::supports_evex(), "");
3664   InstructionMark im(this);
3665   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3666   attributes.reset_is_clear_context();
3667   attributes.set_is_evex_instruction();
3668   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
3669   int dst_enc = kdst->encoding();
3670   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3671   emit_int8(0x29);
3672   emit_operand(as_Register(dst_enc), src);
3673 }
3674 
3675 void Assembler::pmovmskb(Register dst, XMMRegister src) {
3676   assert(VM_Version::supports_sse2(), "");
3677   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3678   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3679   emit_int8((unsigned char)0xD7);
3680   emit_int8((unsigned char)(0xC0 | encode));
3681 }
3682 
3683 void Assembler::vpmovmskb(Register dst, XMMRegister src) {
3684   assert(VM_Version::supports_avx2(), "");
3685   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3686   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3687   emit_int8((unsigned char)0xD7);
3688   emit_int8((unsigned char)(0xC0 | encode));
3689 }
3690 
3691 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
3692   assert(VM_Version::supports_sse4_1(), "");
3693   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3694   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3695   emit_int8(0x16);
3696   emit_int8((unsigned char)(0xC0 | encode));
3697   emit_int8(imm8);
3698 }
3699 
3700 void Assembler::pextrd(Address dst, XMMRegister src, int imm8) {
3701   assert(VM_Version::supports_sse4_1(), "");
3702   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3703   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3704   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3705   emit_int8(0x16);
3706   emit_operand(src, dst);
3707   emit_int8(imm8);
3708 }
3709 
3710 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
3711   assert(VM_Version::supports_sse4_1(), "");
3712   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3713   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3714   emit_int8(0x16);
3715   emit_int8((unsigned char)(0xC0 | encode));
3716   emit_int8(imm8);
3717 }
3718 
3719 void Assembler::pextrq(Address dst, XMMRegister src, int imm8) {
3720   assert(VM_Version::supports_sse4_1(), "");
3721   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3722   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3723   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3724   emit_int8(0x16);
3725   emit_operand(src, dst);
3726   emit_int8(imm8);
3727 }
3728 
3729 void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
3730   assert(VM_Version::supports_sse2(), "");
3731   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3732   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3733   emit_int8((unsigned char)0xC5);
3734   emit_int8((unsigned char)(0xC0 | encode));
3735   emit_int8(imm8);
3736 }
3737 
3738 void Assembler::pextrw(Address dst, XMMRegister src, int imm8) {
3739   assert(VM_Version::supports_sse4_1(), "");
3740   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3741   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
3742   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3743   emit_int8((unsigned char)0x15);
3744   emit_operand(src, dst);
3745   emit_int8(imm8);
3746 }
3747 
3748 void Assembler::pextrb(Address dst, XMMRegister src, int imm8) {
3749   assert(VM_Version::supports_sse4_1(), "");
3750   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3751   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
3752   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3753   emit_int8(0x14);
3754   emit_operand(src, dst);
3755   emit_int8(imm8);
3756 }
3757 
3758 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
3759   assert(VM_Version::supports_sse4_1(), "");
3760   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3761   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3762   emit_int8(0x22);
3763   emit_int8((unsigned char)(0xC0 | encode));
3764   emit_int8(imm8);
3765 }
3766 
3767 void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) {
3768   assert(VM_Version::supports_sse4_1(), "");
3769   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3770   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3771   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3772   emit_int8(0x22);
3773   emit_operand(dst,src);
3774   emit_int8(imm8);
3775 }
3776 
3777 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
3778   assert(VM_Version::supports_sse4_1(), "");
3779   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3780   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3781   emit_int8(0x22);
3782   emit_int8((unsigned char)(0xC0 | encode));
3783   emit_int8(imm8);
3784 }
3785 
3786 void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) {
3787   assert(VM_Version::supports_sse4_1(), "");
3788   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3789   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3790   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3791   emit_int8(0x22);
3792   emit_operand(dst, src);
3793   emit_int8(imm8);
3794 }
3795 
3796 void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
3797   assert(VM_Version::supports_sse2(), "");
3798   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3799   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3800   emit_int8((unsigned char)0xC4);
3801   emit_int8((unsigned char)(0xC0 | encode));
3802   emit_int8(imm8);
3803 }
3804 
3805 void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) {
3806   assert(VM_Version::supports_sse2(), "");
3807   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3808   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
3809   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3810   emit_int8((unsigned char)0xC4);
3811   emit_operand(dst, src);
3812   emit_int8(imm8);
3813 }
3814 
3815 void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) {
3816   assert(VM_Version::supports_sse4_1(), "");
3817   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3818   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
3819   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3820   emit_int8(0x20);
3821   emit_operand(dst, src);
3822   emit_int8(imm8);
3823 }
3824 
3825 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
3826   assert(VM_Version::supports_sse4_1(), "");
3827   InstructionMark im(this);
3828   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3829   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3830   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3831   emit_int8(0x30);
3832   emit_operand(dst, src);
3833 }
3834 
3835 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
3836   assert(VM_Version::supports_sse4_1(), "");
3837   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3838   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3839   emit_int8(0x30);
3840   emit_int8((unsigned char)(0xC0 | encode));
3841 }
3842 
3843 void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
3844   assert(VM_Version::supports_avx(), "");
3845   InstructionMark im(this);
3846   assert(dst != xnoreg, "sanity");
3847   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3848   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3849   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3850   emit_int8(0x30);
3851   emit_operand(dst, src);
3852 }
3853 
3854 void Assembler::evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
3855   assert(is_vector_masking(), "");
3856   assert(VM_Version::supports_avx512vlbw(), "");
3857   assert(dst != xnoreg, "sanity");
3858   InstructionMark im(this);
3859   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3860   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3861   attributes.set_embedded_opmask_register_specifier(mask);
3862   attributes.set_is_evex_instruction();
3863   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3864   emit_int8(0x30);
3865   emit_operand(dst, src);
3866 }
3867 
3868 void Assembler::evpmovwb(Address dst, XMMRegister src, int vector_len) {
3869   assert(VM_Version::supports_avx512vlbw(), "");
3870   assert(src != xnoreg, "sanity");
3871   InstructionMark im(this);
3872   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3873   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3874   attributes.set_is_evex_instruction();
3875   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
3876   emit_int8(0x30);
3877   emit_operand(src, dst);
3878 }
3879 
3880 void Assembler::evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len) {
3881   assert(is_vector_masking(), "");
3882   assert(VM_Version::supports_avx512vlbw(), "");
3883   assert(src != xnoreg, "sanity");
3884   InstructionMark im(this);
3885   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3886   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3887   attributes.reset_is_clear_context();
3888   attributes.set_embedded_opmask_register_specifier(mask);
3889   attributes.set_is_evex_instruction();
3890   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
3891   emit_int8(0x30);
3892   emit_operand(src, dst);
3893 }
3894 
3895 // generic
3896 void Assembler::pop(Register dst) {
3897   int encode = prefix_and_encode(dst->encoding());
3898   emit_int8(0x58 | encode);
3899 }
3900 
3901 void Assembler::popcntl(Register dst, Address src) {
3902   assert(VM_Version::supports_popcnt(), "must support");
3903   InstructionMark im(this);
3904   emit_int8((unsigned char)0xF3);
3905   prefix(src, dst);
3906   emit_int8(0x0F);
3907   emit_int8((unsigned char)0xB8);
3908   emit_operand(dst, src);
3909 }
3910 
3911 void Assembler::popcntl(Register dst, Register src) {
3912   assert(VM_Version::supports_popcnt(), "must support");
3913   emit_int8((unsigned char)0xF3);
3914   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3915   emit_int8(0x0F);
3916   emit_int8((unsigned char)0xB8);
3917   emit_int8((unsigned char)(0xC0 | encode));
3918 }
3919 
3920 void Assembler::vpopcntd(XMMRegister dst, XMMRegister src, int vector_len) {
3921   assert(VM_Version::supports_vpopcntdq(), "must support vpopcntdq feature");
3922   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3923   attributes.set_is_evex_instruction();
3924   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3925   emit_int8(0x55);
3926   emit_int8((unsigned char)(0xC0 | encode));
3927 }
3928 
3929 void Assembler::popf() {
3930   emit_int8((unsigned char)0x9D);
3931 }
3932 
3933 #ifndef _LP64 // no 32bit push/pop on amd64
3934 void Assembler::popl(Address dst) {
3935   // NOTE: this will adjust stack by 8byte on 64bits
3936   InstructionMark im(this);
3937   prefix(dst);
3938   emit_int8((unsigned char)0x8F);
3939   emit_operand(rax, dst);
3940 }
3941 #endif
3942 
3943 void Assembler::prefetch_prefix(Address src) {
3944   prefix(src);
3945   emit_int8(0x0F);
3946 }
3947 
3948 void Assembler::prefetchnta(Address src) {
3949   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3950   InstructionMark im(this);
3951   prefetch_prefix(src);
3952   emit_int8(0x18);
3953   emit_operand(rax, src); // 0, src
3954 }
3955 
3956 void Assembler::prefetchr(Address src) {
3957   assert(VM_Version::supports_3dnow_prefetch(), "must support");
3958   InstructionMark im(this);
3959   prefetch_prefix(src);
3960   emit_int8(0x0D);
3961   emit_operand(rax, src); // 0, src
3962 }
3963 
3964 void Assembler::prefetcht0(Address src) {
3965   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3966   InstructionMark im(this);
3967   prefetch_prefix(src);
3968   emit_int8(0x18);
3969   emit_operand(rcx, src); // 1, src
3970 }
3971 
3972 void Assembler::prefetcht1(Address src) {
3973   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3974   InstructionMark im(this);
3975   prefetch_prefix(src);
3976   emit_int8(0x18);
3977   emit_operand(rdx, src); // 2, src
3978 }
3979 
3980 void Assembler::prefetcht2(Address src) {
3981   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3982   InstructionMark im(this);
3983   prefetch_prefix(src);
3984   emit_int8(0x18);
3985   emit_operand(rbx, src); // 3, src
3986 }
3987 
3988 void Assembler::prefetchw(Address src) {
3989   assert(VM_Version::supports_3dnow_prefetch(), "must support");
3990   InstructionMark im(this);
3991   prefetch_prefix(src);
3992   emit_int8(0x0D);
3993   emit_operand(rcx, src); // 1, src
3994 }
3995 
3996 void Assembler::prefix(Prefix p) {
3997   emit_int8(p);
3998 }
3999 
4000 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
4001   assert(VM_Version::supports_ssse3(), "");
4002   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4003   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4004   emit_int8(0x00);
4005   emit_int8((unsigned char)(0xC0 | encode));
4006 }
4007 
4008 void Assembler::vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4009   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4010          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4011          0, "");
4012   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4013   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4014   emit_int8(0x00);
4015   emit_int8((unsigned char)(0xC0 | encode));
4016 }
4017 
4018 void Assembler::pshufb(XMMRegister dst, Address src) {
4019   assert(VM_Version::supports_ssse3(), "");
4020   InstructionMark im(this);
4021   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4022   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4023   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4024   emit_int8(0x00);
4025   emit_operand(dst, src);
4026 }
4027 
4028 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
4029   assert(isByte(mode), "invalid value");
4030   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4031   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
4032   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4033   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4034   emit_int8(0x70);
4035   emit_int8((unsigned char)(0xC0 | encode));
4036   emit_int8(mode & 0xFF);
4037 }
4038 
4039 void Assembler::vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len) {
4040   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4041          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4042          0, "");
4043   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4044   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4045   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4046   emit_int8(0x70);
4047   emit_int8((unsigned char)(0xC0 | encode));
4048   emit_int8(mode & 0xFF);
4049 }
4050 
4051 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
4052   assert(isByte(mode), "invalid value");
4053   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4054   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4055   InstructionMark im(this);
4056   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4057   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4058   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4059   emit_int8(0x70);
4060   emit_operand(dst, src);
4061   emit_int8(mode & 0xFF);
4062 }
4063 
4064 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
4065   assert(isByte(mode), "invalid value");
4066   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4067   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4068   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4069   emit_int8(0x70);
4070   emit_int8((unsigned char)(0xC0 | encode));
4071   emit_int8(mode & 0xFF);
4072 }
4073 
4074 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
4075   assert(isByte(mode), "invalid value");
4076   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4077   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4078   InstructionMark im(this);
4079   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4080   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4081   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4082   emit_int8(0x70);
4083   emit_operand(dst, src);
4084   emit_int8(mode & 0xFF);
4085 }
4086 void Assembler::evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
4087   assert(VM_Version::supports_evex(), "requires EVEX support");
4088   assert(vector_len == Assembler::AVX_256bit || vector_len == Assembler::AVX_512bit, "");
4089   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4090   attributes.set_is_evex_instruction();
4091   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4092   emit_int8(0x43);
4093   emit_int8((unsigned char)(0xC0 | encode));
4094   emit_int8(imm8 & 0xFF);
4095 }
4096 
4097 void Assembler::psrldq(XMMRegister dst, int shift) {
4098   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
4099   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4100   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4101   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4102   emit_int8(0x73);
4103   emit_int8((unsigned char)(0xC0 | encode));
4104   emit_int8(shift);
4105 }
4106 
4107 void Assembler::pslldq(XMMRegister dst, int shift) {
4108   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
4109   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4110   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4111   // XMM7 is for /7 encoding: 66 0F 73 /7 ib
4112   int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4113   emit_int8(0x73);
4114   emit_int8((unsigned char)(0xC0 | encode));
4115   emit_int8(shift);
4116 }
4117 
4118 void Assembler::ptest(XMMRegister dst, Address src) {
4119   assert(VM_Version::supports_sse4_1(), "");
4120   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4121   InstructionMark im(this);
4122   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4123   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4124   emit_int8(0x17);
4125   emit_operand(dst, src);
4126 }
4127 
4128 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
4129   assert(VM_Version::supports_sse4_1(), "");
4130   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4131   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4132   emit_int8(0x17);
4133   emit_int8((unsigned char)(0xC0 | encode));
4134 }
4135 
4136 void Assembler::vptest(XMMRegister dst, Address src) {
4137   assert(VM_Version::supports_avx(), "");
4138   InstructionMark im(this);
4139   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4140   assert(dst != xnoreg, "sanity");
4141   // swap src<->dst for encoding
4142   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4143   emit_int8(0x17);
4144   emit_operand(dst, src);
4145 }
4146 
4147 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
4148   assert(VM_Version::supports_avx(), "");
4149   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4150   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4151   emit_int8(0x17);
4152   emit_int8((unsigned char)(0xC0 | encode));
4153 }
4154 
4155 void Assembler::punpcklbw(XMMRegister dst, Address src) {
4156   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4157   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4158   InstructionMark im(this);
4159   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
4160   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4161   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4162   emit_int8(0x60);
4163   emit_operand(dst, src);
4164 }
4165 
4166 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
4167   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4168   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
4169   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4170   emit_int8(0x60);
4171   emit_int8((unsigned char)(0xC0 | encode));
4172 }
4173 
4174 void Assembler::punpckldq(XMMRegister dst, Address src) {
4175   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4176   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4177   InstructionMark im(this);
4178   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4179   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4180   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4181   emit_int8(0x62);
4182   emit_operand(dst, src);
4183 }
4184 
4185 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
4186   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4187   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4188   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4189   emit_int8(0x62);
4190   emit_int8((unsigned char)(0xC0 | encode));
4191 }
4192 
4193 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
4194   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4195   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4196   attributes.set_rex_vex_w_reverted();
4197   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4198   emit_int8(0x6C);
4199   emit_int8((unsigned char)(0xC0 | encode));
4200 }
4201 
4202 void Assembler::push(int32_t imm32) {
4203   // in 64bits we push 64bits onto the stack but only
4204   // take a 32bit immediate
4205   emit_int8(0x68);
4206   emit_int32(imm32);
4207 }
4208 
4209 void Assembler::push(Register src) {
4210   int encode = prefix_and_encode(src->encoding());
4211 
4212   emit_int8(0x50 | encode);
4213 }
4214 
4215 void Assembler::pushf() {
4216   emit_int8((unsigned char)0x9C);
4217 }
4218 
4219 #ifndef _LP64 // no 32bit push/pop on amd64
4220 void Assembler::pushl(Address src) {
4221   // Note this will push 64bit on 64bit
4222   InstructionMark im(this);
4223   prefix(src);
4224   emit_int8((unsigned char)0xFF);
4225   emit_operand(rsi, src);
4226 }
4227 #endif
4228 
4229 void Assembler::rcll(Register dst, int imm8) {
4230   assert(isShiftCount(imm8), "illegal shift count");
4231   int encode = prefix_and_encode(dst->encoding());
4232   if (imm8 == 1) {
4233     emit_int8((unsigned char)0xD1);
4234     emit_int8((unsigned char)(0xD0 | encode));
4235   } else {
4236     emit_int8((unsigned char)0xC1);
4237     emit_int8((unsigned char)0xD0 | encode);
4238     emit_int8(imm8);
4239   }
4240 }
4241 
4242 void Assembler::rcpps(XMMRegister dst, XMMRegister src) {
4243   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4244   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4245   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4246   emit_int8(0x53);
4247   emit_int8((unsigned char)(0xC0 | encode));
4248 }
4249 
4250 void Assembler::rcpss(XMMRegister dst, XMMRegister src) {
4251   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4252   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4253   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4254   emit_int8(0x53);
4255   emit_int8((unsigned char)(0xC0 | encode));
4256 }
4257 
4258 void Assembler::rdtsc() {
4259   emit_int8((unsigned char)0x0F);
4260   emit_int8((unsigned char)0x31);
4261 }
4262 
4263 // copies data from [esi] to [edi] using rcx pointer sized words
4264 // generic
4265 void Assembler::rep_mov() {
4266   emit_int8((unsigned char)0xF3);
4267   // MOVSQ
4268   LP64_ONLY(prefix(REX_W));
4269   emit_int8((unsigned char)0xA5);
4270 }
4271 
4272 // sets rcx bytes with rax, value at [edi]
4273 void Assembler::rep_stosb() {
4274   emit_int8((unsigned char)0xF3); // REP
4275   LP64_ONLY(prefix(REX_W));
4276   emit_int8((unsigned char)0xAA); // STOSB
4277 }
4278 
4279 // sets rcx pointer sized words with rax, value at [edi]
4280 // generic
4281 void Assembler::rep_stos() {
4282   emit_int8((unsigned char)0xF3); // REP
4283   LP64_ONLY(prefix(REX_W));       // LP64:STOSQ, LP32:STOSD
4284   emit_int8((unsigned char)0xAB);
4285 }
4286 
4287 // scans rcx pointer sized words at [edi] for occurance of rax,
4288 // generic
4289 void Assembler::repne_scan() { // repne_scan
4290   emit_int8((unsigned char)0xF2);
4291   // SCASQ
4292   LP64_ONLY(prefix(REX_W));
4293   emit_int8((unsigned char)0xAF);
4294 }
4295 
4296 #ifdef _LP64
4297 // scans rcx 4 byte words at [edi] for occurance of rax,
4298 // generic
4299 void Assembler::repne_scanl() { // repne_scan
4300   emit_int8((unsigned char)0xF2);
4301   // SCASL
4302   emit_int8((unsigned char)0xAF);
4303 }
4304 #endif
4305 
4306 void Assembler::ret(int imm16) {
4307   if (imm16 == 0) {
4308     emit_int8((unsigned char)0xC3);
4309   } else {
4310     emit_int8((unsigned char)0xC2);
4311     emit_int16(imm16);
4312   }
4313 }
4314 
4315 void Assembler::sahf() {
4316 #ifdef _LP64
4317   // Not supported in 64bit mode
4318   ShouldNotReachHere();
4319 #endif
4320   emit_int8((unsigned char)0x9E);
4321 }
4322 
4323 void Assembler::sarl(Register dst, int imm8) {
4324   int encode = prefix_and_encode(dst->encoding());
4325   assert(isShiftCount(imm8), "illegal shift count");
4326   if (imm8 == 1) {
4327     emit_int8((unsigned char)0xD1);
4328     emit_int8((unsigned char)(0xF8 | encode));
4329   } else {
4330     emit_int8((unsigned char)0xC1);
4331     emit_int8((unsigned char)(0xF8 | encode));
4332     emit_int8(imm8);
4333   }
4334 }
4335 
4336 void Assembler::sarl(Register dst) {
4337   int encode = prefix_and_encode(dst->encoding());
4338   emit_int8((unsigned char)0xD3);
4339   emit_int8((unsigned char)(0xF8 | encode));
4340 }
4341 
4342 void Assembler::sbbl(Address dst, int32_t imm32) {
4343   InstructionMark im(this);
4344   prefix(dst);
4345   emit_arith_operand(0x81, rbx, dst, imm32);
4346 }
4347 
4348 void Assembler::sbbl(Register dst, int32_t imm32) {
4349   prefix(dst);
4350   emit_arith(0x81, 0xD8, dst, imm32);
4351 }
4352 
4353 
4354 void Assembler::sbbl(Register dst, Address src) {
4355   InstructionMark im(this);
4356   prefix(src, dst);
4357   emit_int8(0x1B);
4358   emit_operand(dst, src);
4359 }
4360 
4361 void Assembler::sbbl(Register dst, Register src) {
4362   (void) prefix_and_encode(dst->encoding(), src->encoding());
4363   emit_arith(0x1B, 0xC0, dst, src);
4364 }
4365 
4366 void Assembler::setb(Condition cc, Register dst) {
4367   assert(0 <= cc && cc < 16, "illegal cc");
4368   int encode = prefix_and_encode(dst->encoding(), true);
4369   emit_int8(0x0F);
4370   emit_int8((unsigned char)0x90 | cc);
4371   emit_int8((unsigned char)(0xC0 | encode));
4372 }
4373 
4374 void Assembler::palignr(XMMRegister dst, XMMRegister src, int imm8) {
4375   assert(VM_Version::supports_ssse3(), "");
4376   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
4377   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4378   emit_int8((unsigned char)0x0F);
4379   emit_int8((unsigned char)(0xC0 | encode));
4380   emit_int8(imm8);
4381 }
4382 
4383 void Assembler::vpalignr(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
4384   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4385          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4386          0, "");
4387   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
4388   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4389   emit_int8((unsigned char)0x0F);
4390   emit_int8((unsigned char)(0xC0 | encode));
4391   emit_int8(imm8);
4392 }
4393 
4394 void Assembler::pblendw(XMMRegister dst, XMMRegister src, int imm8) {
4395   assert(VM_Version::supports_sse4_1(), "");
4396   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4397   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4398   emit_int8((unsigned char)0x0E);
4399   emit_int8((unsigned char)(0xC0 | encode));
4400   emit_int8(imm8);
4401 }
4402 
4403 void Assembler::sha1rnds4(XMMRegister dst, XMMRegister src, int imm8) {
4404   assert(VM_Version::supports_sha(), "");
4405   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, /* rex_w */ false);
4406   emit_int8((unsigned char)0xCC);
4407   emit_int8((unsigned char)(0xC0 | encode));
4408   emit_int8((unsigned char)imm8);
4409 }
4410 
4411 void Assembler::sha1nexte(XMMRegister dst, XMMRegister src) {
4412   assert(VM_Version::supports_sha(), "");
4413   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4414   emit_int8((unsigned char)0xC8);
4415   emit_int8((unsigned char)(0xC0 | encode));
4416 }
4417 
4418 void Assembler::sha1msg1(XMMRegister dst, XMMRegister src) {
4419   assert(VM_Version::supports_sha(), "");
4420   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4421   emit_int8((unsigned char)0xC9);
4422   emit_int8((unsigned char)(0xC0 | encode));
4423 }
4424 
4425 void Assembler::sha1msg2(XMMRegister dst, XMMRegister src) {
4426   assert(VM_Version::supports_sha(), "");
4427   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4428   emit_int8((unsigned char)0xCA);
4429   emit_int8((unsigned char)(0xC0 | encode));
4430 }
4431 
4432 // xmm0 is implicit additional source to this instruction.
4433 void Assembler::sha256rnds2(XMMRegister dst, XMMRegister src) {
4434   assert(VM_Version::supports_sha(), "");
4435   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4436   emit_int8((unsigned char)0xCB);
4437   emit_int8((unsigned char)(0xC0 | encode));
4438 }
4439 
4440 void Assembler::sha256msg1(XMMRegister dst, XMMRegister src) {
4441   assert(VM_Version::supports_sha(), "");
4442   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4443   emit_int8((unsigned char)0xCC);
4444   emit_int8((unsigned char)(0xC0 | encode));
4445 }
4446 
4447 void Assembler::sha256msg2(XMMRegister dst, XMMRegister src) {
4448   assert(VM_Version::supports_sha(), "");
4449   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4450   emit_int8((unsigned char)0xCD);
4451   emit_int8((unsigned char)(0xC0 | encode));
4452 }
4453 
4454 
4455 void Assembler::shll(Register dst, int imm8) {
4456   assert(isShiftCount(imm8), "illegal shift count");
4457   int encode = prefix_and_encode(dst->encoding());
4458   if (imm8 == 1 ) {
4459     emit_int8((unsigned char)0xD1);
4460     emit_int8((unsigned char)(0xE0 | encode));
4461   } else {
4462     emit_int8((unsigned char)0xC1);
4463     emit_int8((unsigned char)(0xE0 | encode));
4464     emit_int8(imm8);
4465   }
4466 }
4467 
4468 void Assembler::shll(Register dst) {
4469   int encode = prefix_and_encode(dst->encoding());
4470   emit_int8((unsigned char)0xD3);
4471   emit_int8((unsigned char)(0xE0 | encode));
4472 }
4473 
4474 void Assembler::shrl(Register dst, int imm8) {
4475   assert(isShiftCount(imm8), "illegal shift count");
4476   int encode = prefix_and_encode(dst->encoding());
4477   emit_int8((unsigned char)0xC1);
4478   emit_int8((unsigned char)(0xE8 | encode));
4479   emit_int8(imm8);
4480 }
4481 
4482 void Assembler::shrl(Register dst) {
4483   int encode = prefix_and_encode(dst->encoding());
4484   emit_int8((unsigned char)0xD3);
4485   emit_int8((unsigned char)(0xE8 | encode));
4486 }
4487 
4488 // copies a single word from [esi] to [edi]
4489 void Assembler::smovl() {
4490   emit_int8((unsigned char)0xA5);
4491 }
4492 
4493 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
4494   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4495   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4496   attributes.set_rex_vex_w_reverted();
4497   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4498   emit_int8(0x51);
4499   emit_int8((unsigned char)(0xC0 | encode));
4500 }
4501 
4502 void Assembler::sqrtsd(XMMRegister dst, Address src) {
4503   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4504   InstructionMark im(this);
4505   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4506   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4507   attributes.set_rex_vex_w_reverted();
4508   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4509   emit_int8(0x51);
4510   emit_operand(dst, src);
4511 }
4512 
4513 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
4514   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4515   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4516   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4517   emit_int8(0x51);
4518   emit_int8((unsigned char)(0xC0 | encode));
4519 }
4520 
4521 void Assembler::std() {
4522   emit_int8((unsigned char)0xFD);
4523 }
4524 
4525 void Assembler::sqrtss(XMMRegister dst, Address src) {
4526   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4527   InstructionMark im(this);
4528   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4529   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4530   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4531   emit_int8(0x51);
4532   emit_operand(dst, src);
4533 }
4534 
4535 void Assembler::stmxcsr( Address dst) {
4536   if (UseAVX > 0 ) {
4537     assert(VM_Version::supports_avx(), "");
4538     InstructionMark im(this);
4539     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4540     vex_prefix(dst, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4541     emit_int8((unsigned char)0xAE);
4542     emit_operand(as_Register(3), dst);
4543   } else {
4544     NOT_LP64(assert(VM_Version::supports_sse(), ""));
4545     InstructionMark im(this);
4546     prefix(dst);
4547     emit_int8(0x0F);
4548     emit_int8((unsigned char)0xAE);
4549     emit_operand(as_Register(3), dst);
4550   }
4551 }
4552 
4553 void Assembler::subl(Address dst, int32_t imm32) {
4554   InstructionMark im(this);
4555   prefix(dst);
4556   emit_arith_operand(0x81, rbp, dst, imm32);
4557 }
4558 
4559 void Assembler::subl(Address dst, Register src) {
4560   InstructionMark im(this);
4561   prefix(dst, src);
4562   emit_int8(0x29);
4563   emit_operand(src, dst);
4564 }
4565 
4566 void Assembler::subl(Register dst, int32_t imm32) {
4567   prefix(dst);
4568   emit_arith(0x81, 0xE8, dst, imm32);
4569 }
4570 
4571 // Force generation of a 4 byte immediate value even if it fits into 8bit
4572 void Assembler::subl_imm32(Register dst, int32_t imm32) {
4573   prefix(dst);
4574   emit_arith_imm32(0x81, 0xE8, dst, imm32);
4575 }
4576 
4577 void Assembler::subl(Register dst, Address src) {
4578   InstructionMark im(this);
4579   prefix(src, dst);
4580   emit_int8(0x2B);
4581   emit_operand(dst, src);
4582 }
4583 
4584 void Assembler::subl(Register dst, Register src) {
4585   (void) prefix_and_encode(dst->encoding(), src->encoding());
4586   emit_arith(0x2B, 0xC0, dst, src);
4587 }
4588 
4589 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
4590   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4591   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4592   attributes.set_rex_vex_w_reverted();
4593   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4594   emit_int8(0x5C);
4595   emit_int8((unsigned char)(0xC0 | encode));
4596 }
4597 
4598 void Assembler::subsd(XMMRegister dst, Address src) {
4599   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4600   InstructionMark im(this);
4601   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4602   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4603   attributes.set_rex_vex_w_reverted();
4604   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4605   emit_int8(0x5C);
4606   emit_operand(dst, src);
4607 }
4608 
4609 void Assembler::subss(XMMRegister dst, XMMRegister src) {
4610   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4611   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true , /* uses_vl */ false);
4612   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4613   emit_int8(0x5C);
4614   emit_int8((unsigned char)(0xC0 | encode));
4615 }
4616 
4617 void Assembler::subss(XMMRegister dst, Address src) {
4618   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4619   InstructionMark im(this);
4620   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4621   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4622   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4623   emit_int8(0x5C);
4624   emit_operand(dst, src);
4625 }
4626 
4627 void Assembler::testb(Register dst, int imm8) {
4628   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
4629   (void) prefix_and_encode(dst->encoding(), true);
4630   emit_arith_b(0xF6, 0xC0, dst, imm8);
4631 }
4632 
4633 void Assembler::testb(Address dst, int imm8) {
4634   InstructionMark im(this);
4635   prefix(dst);
4636   emit_int8((unsigned char)0xF6);
4637   emit_operand(rax, dst, 1);
4638   emit_int8(imm8);
4639 }
4640 
4641 void Assembler::testl(Register dst, int32_t imm32) {
4642   // not using emit_arith because test
4643   // doesn't support sign-extension of
4644   // 8bit operands
4645   int encode = dst->encoding();
4646   if (encode == 0) {
4647     emit_int8((unsigned char)0xA9);
4648   } else {
4649     encode = prefix_and_encode(encode);
4650     emit_int8((unsigned char)0xF7);
4651     emit_int8((unsigned char)(0xC0 | encode));
4652   }
4653   emit_int32(imm32);
4654 }
4655 
4656 void Assembler::testl(Register dst, Register src) {
4657   (void) prefix_and_encode(dst->encoding(), src->encoding());
4658   emit_arith(0x85, 0xC0, dst, src);
4659 }
4660 
4661 void Assembler::testl(Register dst, Address src) {
4662   InstructionMark im(this);
4663   prefix(src, dst);
4664   emit_int8((unsigned char)0x85);
4665   emit_operand(dst, src);
4666 }
4667 
4668 void Assembler::tzcntl(Register dst, Register src) {
4669   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
4670   emit_int8((unsigned char)0xF3);
4671   int encode = prefix_and_encode(dst->encoding(), src->encoding());
4672   emit_int8(0x0F);
4673   emit_int8((unsigned char)0xBC);
4674   emit_int8((unsigned char)0xC0 | encode);
4675 }
4676 
4677 void Assembler::tzcntq(Register dst, Register src) {
4678   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
4679   emit_int8((unsigned char)0xF3);
4680   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4681   emit_int8(0x0F);
4682   emit_int8((unsigned char)0xBC);
4683   emit_int8((unsigned char)(0xC0 | encode));
4684 }
4685 
4686 void Assembler::ucomisd(XMMRegister dst, Address src) {
4687   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4688   InstructionMark im(this);
4689   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4690   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4691   attributes.set_rex_vex_w_reverted();
4692   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4693   emit_int8(0x2E);
4694   emit_operand(dst, src);
4695 }
4696 
4697 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
4698   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4699   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4700   attributes.set_rex_vex_w_reverted();
4701   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4702   emit_int8(0x2E);
4703   emit_int8((unsigned char)(0xC0 | encode));
4704 }
4705 
4706 void Assembler::ucomiss(XMMRegister dst, Address src) {
4707   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4708   InstructionMark im(this);
4709   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4710   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4711   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4712   emit_int8(0x2E);
4713   emit_operand(dst, src);
4714 }
4715 
4716 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
4717   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4718   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4719   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4720   emit_int8(0x2E);
4721   emit_int8((unsigned char)(0xC0 | encode));
4722 }
4723 
4724 void Assembler::xabort(int8_t imm8) {
4725   emit_int8((unsigned char)0xC6);
4726   emit_int8((unsigned char)0xF8);
4727   emit_int8((unsigned char)(imm8 & 0xFF));
4728 }
4729 
4730 void Assembler::xaddb(Address dst, Register src) {
4731   InstructionMark im(this);
4732   prefix(dst, src, true);
4733   emit_int8(0x0F);
4734   emit_int8((unsigned char)0xC0);
4735   emit_operand(src, dst);
4736 }
4737 
4738 void Assembler::xaddw(Address dst, Register src) {
4739   InstructionMark im(this);
4740   emit_int8(0x66);
4741   prefix(dst, src);
4742   emit_int8(0x0F);
4743   emit_int8((unsigned char)0xC1);
4744   emit_operand(src, dst);
4745 }
4746 
4747 void Assembler::xaddl(Address dst, Register src) {
4748   InstructionMark im(this);
4749   prefix(dst, src);
4750   emit_int8(0x0F);
4751   emit_int8((unsigned char)0xC1);
4752   emit_operand(src, dst);
4753 }
4754 
4755 void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
4756   InstructionMark im(this);
4757   relocate(rtype);
4758   if (abort.is_bound()) {
4759     address entry = target(abort);
4760     assert(entry != NULL, "abort entry NULL");
4761     intptr_t offset = entry - pc();
4762     emit_int8((unsigned char)0xC7);
4763     emit_int8((unsigned char)0xF8);
4764     emit_int32(offset - 6); // 2 opcode + 4 address
4765   } else {
4766     abort.add_patch_at(code(), locator());
4767     emit_int8((unsigned char)0xC7);
4768     emit_int8((unsigned char)0xF8);
4769     emit_int32(0);
4770   }
4771 }
4772 
4773 void Assembler::xchgb(Register dst, Address src) { // xchg
4774   InstructionMark im(this);
4775   prefix(src, dst, true);
4776   emit_int8((unsigned char)0x86);
4777   emit_operand(dst, src);
4778 }
4779 
4780 void Assembler::xchgw(Register dst, Address src) { // xchg
4781   InstructionMark im(this);
4782   emit_int8(0x66);
4783   prefix(src, dst);
4784   emit_int8((unsigned char)0x87);
4785   emit_operand(dst, src);
4786 }
4787 
4788 void Assembler::xchgl(Register dst, Address src) { // xchg
4789   InstructionMark im(this);
4790   prefix(src, dst);
4791   emit_int8((unsigned char)0x87);
4792   emit_operand(dst, src);
4793 }
4794 
4795 void Assembler::xchgl(Register dst, Register src) {
4796   int encode = prefix_and_encode(dst->encoding(), src->encoding());
4797   emit_int8((unsigned char)0x87);
4798   emit_int8((unsigned char)(0xC0 | encode));
4799 }
4800 
4801 void Assembler::xend() {
4802   emit_int8((unsigned char)0x0F);
4803   emit_int8((unsigned char)0x01);
4804   emit_int8((unsigned char)0xD5);
4805 }
4806 
4807 void Assembler::xgetbv() {
4808   emit_int8(0x0F);
4809   emit_int8(0x01);
4810   emit_int8((unsigned char)0xD0);
4811 }
4812 
4813 void Assembler::xorl(Register dst, int32_t imm32) {
4814   prefix(dst);
4815   emit_arith(0x81, 0xF0, dst, imm32);
4816 }
4817 
4818 void Assembler::xorl(Register dst, Address src) {
4819   InstructionMark im(this);
4820   prefix(src, dst);
4821   emit_int8(0x33);
4822   emit_operand(dst, src);
4823 }
4824 
4825 void Assembler::xorl(Register dst, Register src) {
4826   (void) prefix_and_encode(dst->encoding(), src->encoding());
4827   emit_arith(0x33, 0xC0, dst, src);
4828 }
4829 
4830 void Assembler::xorb(Register dst, Address src) {
4831   InstructionMark im(this);
4832   prefix(src, dst);
4833   emit_int8(0x32);
4834   emit_operand(dst, src);
4835 }
4836 
4837 // AVX 3-operands scalar float-point arithmetic instructions
4838 
4839 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
4840   assert(VM_Version::supports_avx(), "");
4841   InstructionMark im(this);
4842   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4843   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4844   attributes.set_rex_vex_w_reverted();
4845   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4846   emit_int8(0x58);
4847   emit_operand(dst, src);
4848 }
4849 
4850 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4851   assert(VM_Version::supports_avx(), "");
4852   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4853   attributes.set_rex_vex_w_reverted();
4854   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4855   emit_int8(0x58);
4856   emit_int8((unsigned char)(0xC0 | encode));
4857 }
4858 
4859 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
4860   assert(VM_Version::supports_avx(), "");
4861   InstructionMark im(this);
4862   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4863   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4864   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4865   emit_int8(0x58);
4866   emit_operand(dst, src);
4867 }
4868 
4869 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4870   assert(VM_Version::supports_avx(), "");
4871   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4872   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4873   emit_int8(0x58);
4874   emit_int8((unsigned char)(0xC0 | encode));
4875 }
4876 
4877 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
4878   assert(VM_Version::supports_avx(), "");
4879   InstructionMark im(this);
4880   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4881   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4882   attributes.set_rex_vex_w_reverted();
4883   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4884   emit_int8(0x5E);
4885   emit_operand(dst, src);
4886 }
4887 
4888 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4889   assert(VM_Version::supports_avx(), "");
4890   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4891   attributes.set_rex_vex_w_reverted();
4892   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4893   emit_int8(0x5E);
4894   emit_int8((unsigned char)(0xC0 | encode));
4895 }
4896 
4897 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
4898   assert(VM_Version::supports_avx(), "");
4899   InstructionMark im(this);
4900   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4901   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4902   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4903   emit_int8(0x5E);
4904   emit_operand(dst, src);
4905 }
4906 
4907 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4908   assert(VM_Version::supports_avx(), "");
4909   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4910   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4911   emit_int8(0x5E);
4912   emit_int8((unsigned char)(0xC0 | encode));
4913 }
4914 
4915 void Assembler::vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
4916   assert(VM_Version::supports_fma(), "");
4917   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4918   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4919   emit_int8((unsigned char)0xB9);
4920   emit_int8((unsigned char)(0xC0 | encode));
4921 }
4922 
4923 void Assembler::vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
4924   assert(VM_Version::supports_fma(), "");
4925   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4926   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4927   emit_int8((unsigned char)0xB9);
4928   emit_int8((unsigned char)(0xC0 | encode));
4929 }
4930 
4931 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
4932   assert(VM_Version::supports_avx(), "");
4933   InstructionMark im(this);
4934   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4935   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4936   attributes.set_rex_vex_w_reverted();
4937   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4938   emit_int8(0x59);
4939   emit_operand(dst, src);
4940 }
4941 
4942 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4943   assert(VM_Version::supports_avx(), "");
4944   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4945   attributes.set_rex_vex_w_reverted();
4946   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4947   emit_int8(0x59);
4948   emit_int8((unsigned char)(0xC0 | encode));
4949 }
4950 
4951 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
4952   assert(VM_Version::supports_avx(), "");
4953   InstructionMark im(this);
4954   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4955   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4956   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4957   emit_int8(0x59);
4958   emit_operand(dst, src);
4959 }
4960 
4961 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4962   assert(VM_Version::supports_avx(), "");
4963   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4964   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4965   emit_int8(0x59);
4966   emit_int8((unsigned char)(0xC0 | encode));
4967 }
4968 
4969 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
4970   assert(VM_Version::supports_avx(), "");
4971   InstructionMark im(this);
4972   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4973   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4974   attributes.set_rex_vex_w_reverted();
4975   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4976   emit_int8(0x5C);
4977   emit_operand(dst, src);
4978 }
4979 
4980 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4981   assert(VM_Version::supports_avx(), "");
4982   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4983   attributes.set_rex_vex_w_reverted();
4984   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4985   emit_int8(0x5C);
4986   emit_int8((unsigned char)(0xC0 | encode));
4987 }
4988 
4989 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
4990   assert(VM_Version::supports_avx(), "");
4991   InstructionMark im(this);
4992   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4993   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4994   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4995   emit_int8(0x5C);
4996   emit_operand(dst, src);
4997 }
4998 
4999 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5000   assert(VM_Version::supports_avx(), "");
5001   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5002   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5003   emit_int8(0x5C);
5004   emit_int8((unsigned char)(0xC0 | encode));
5005 }
5006 
5007 //====================VECTOR ARITHMETIC=====================================
5008 
5009 // Float-point vector arithmetic
5010 
5011 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
5012   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5013   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5014   attributes.set_rex_vex_w_reverted();
5015   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5016   emit_int8(0x58);
5017   emit_int8((unsigned char)(0xC0 | encode));
5018 }
5019 
5020 void Assembler::addpd(XMMRegister dst, Address src) {
5021   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5022   InstructionMark im(this);
5023   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5024   attributes.set_rex_vex_w_reverted();
5025   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5026   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5027   emit_int8(0x58);
5028   emit_operand(dst, src);
5029 }
5030 
5031 
5032 void Assembler::addps(XMMRegister dst, XMMRegister src) {
5033   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5034   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5035   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5036   emit_int8(0x58);
5037   emit_int8((unsigned char)(0xC0 | encode));
5038 }
5039 
5040 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5041   assert(VM_Version::supports_avx(), "");
5042   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5043   attributes.set_rex_vex_w_reverted();
5044   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5045   emit_int8(0x58);
5046   emit_int8((unsigned char)(0xC0 | encode));
5047 }
5048 
5049 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5050   assert(VM_Version::supports_avx(), "");
5051   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5052   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5053   emit_int8(0x58);
5054   emit_int8((unsigned char)(0xC0 | encode));
5055 }
5056 
5057 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5058   assert(VM_Version::supports_avx(), "");
5059   InstructionMark im(this);
5060   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5061   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5062   attributes.set_rex_vex_w_reverted();
5063   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5064   emit_int8(0x58);
5065   emit_operand(dst, src);
5066 }
5067 
5068 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5069   assert(VM_Version::supports_avx(), "");
5070   InstructionMark im(this);
5071   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5072   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5073   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5074   emit_int8(0x58);
5075   emit_operand(dst, src);
5076 }
5077 
5078 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
5079   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5080   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5081   attributes.set_rex_vex_w_reverted();
5082   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5083   emit_int8(0x5C);
5084   emit_int8((unsigned char)(0xC0 | encode));
5085 }
5086 
5087 void Assembler::subps(XMMRegister dst, XMMRegister src) {
5088   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5089   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5090   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5091   emit_int8(0x5C);
5092   emit_int8((unsigned char)(0xC0 | encode));
5093 }
5094 
5095 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5096   assert(VM_Version::supports_avx(), "");
5097   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5098   attributes.set_rex_vex_w_reverted();
5099   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5100   emit_int8(0x5C);
5101   emit_int8((unsigned char)(0xC0 | encode));
5102 }
5103 
5104 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5105   assert(VM_Version::supports_avx(), "");
5106   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5107   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5108   emit_int8(0x5C);
5109   emit_int8((unsigned char)(0xC0 | encode));
5110 }
5111 
5112 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5113   assert(VM_Version::supports_avx(), "");
5114   InstructionMark im(this);
5115   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5116   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5117   attributes.set_rex_vex_w_reverted();
5118   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5119   emit_int8(0x5C);
5120   emit_operand(dst, src);
5121 }
5122 
5123 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5124   assert(VM_Version::supports_avx(), "");
5125   InstructionMark im(this);
5126   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5127   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5128   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5129   emit_int8(0x5C);
5130   emit_operand(dst, src);
5131 }
5132 
5133 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
5134   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5135   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5136   attributes.set_rex_vex_w_reverted();
5137   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5138   emit_int8(0x59);
5139   emit_int8((unsigned char)(0xC0 | encode));
5140 }
5141 
5142 void Assembler::mulpd(XMMRegister dst, Address src) {
5143   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5144   InstructionMark im(this);
5145   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5146   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5147   attributes.set_rex_vex_w_reverted();
5148   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5149   emit_int8(0x59);
5150   emit_operand(dst, src);
5151 }
5152 
5153 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
5154   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5155   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5156   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5157   emit_int8(0x59);
5158   emit_int8((unsigned char)(0xC0 | encode));
5159 }
5160 
5161 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5162   assert(VM_Version::supports_avx(), "");
5163   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5164   attributes.set_rex_vex_w_reverted();
5165   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5166   emit_int8(0x59);
5167   emit_int8((unsigned char)(0xC0 | encode));
5168 }
5169 
5170 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5171   assert(VM_Version::supports_avx(), "");
5172   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5173   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5174   emit_int8(0x59);
5175   emit_int8((unsigned char)(0xC0 | encode));
5176 }
5177 
5178 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5179   assert(VM_Version::supports_avx(), "");
5180   InstructionMark im(this);
5181   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5182   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5183   attributes.set_rex_vex_w_reverted();
5184   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5185   emit_int8(0x59);
5186   emit_operand(dst, src);
5187 }
5188 
5189 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5190   assert(VM_Version::supports_avx(), "");
5191   InstructionMark im(this);
5192   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5193   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5194   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5195   emit_int8(0x59);
5196   emit_operand(dst, src);
5197 }
5198 
5199 void Assembler::vfmadd231pd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
5200   assert(VM_Version::supports_fma(), "");
5201   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5202   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5203   emit_int8((unsigned char)0xB8);
5204   emit_int8((unsigned char)(0xC0 | encode));
5205 }
5206 
5207 void Assembler::vfmadd231ps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
5208   assert(VM_Version::supports_fma(), "");
5209   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5210   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5211   emit_int8((unsigned char)0xB8);
5212   emit_int8((unsigned char)(0xC0 | encode));
5213 }
5214 
5215 void Assembler::vfmadd231pd(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) {
5216   assert(VM_Version::supports_fma(), "");
5217   InstructionMark im(this);
5218   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5219   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5220   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5221   emit_int8((unsigned char)0xB8);
5222   emit_operand(dst, src2);
5223 }
5224 
5225 void Assembler::vfmadd231ps(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) {
5226   assert(VM_Version::supports_fma(), "");
5227   InstructionMark im(this);
5228   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5229   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5230   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5231   emit_int8((unsigned char)0xB8);
5232   emit_operand(dst, src2);
5233 }
5234 
5235 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
5236   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5237   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5238   attributes.set_rex_vex_w_reverted();
5239   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5240   emit_int8(0x5E);
5241   emit_int8((unsigned char)(0xC0 | encode));
5242 }
5243 
5244 void Assembler::divps(XMMRegister dst, XMMRegister src) {
5245   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5246   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5247   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5248   emit_int8(0x5E);
5249   emit_int8((unsigned char)(0xC0 | encode));
5250 }
5251 
5252 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5253   assert(VM_Version::supports_avx(), "");
5254   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5255   attributes.set_rex_vex_w_reverted();
5256   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5257   emit_int8(0x5E);
5258   emit_int8((unsigned char)(0xC0 | encode));
5259 }
5260 
5261 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5262   assert(VM_Version::supports_avx(), "");
5263   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5264   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5265   emit_int8(0x5E);
5266   emit_int8((unsigned char)(0xC0 | encode));
5267 }
5268 
5269 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5270   assert(VM_Version::supports_avx(), "");
5271   InstructionMark im(this);
5272   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5273   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5274   attributes.set_rex_vex_w_reverted();
5275   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5276   emit_int8(0x5E);
5277   emit_operand(dst, src);
5278 }
5279 
5280 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5281   assert(VM_Version::supports_avx(), "");
5282   InstructionMark im(this);
5283   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5284   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5285   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5286   emit_int8(0x5E);
5287   emit_operand(dst, src);
5288 }
5289 
5290 void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
5291   assert(VM_Version::supports_avx(), "");
5292   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5293   attributes.set_rex_vex_w_reverted();
5294   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5295   emit_int8(0x51);
5296   emit_int8((unsigned char)(0xC0 | encode));
5297 }
5298 
5299 void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
5300   assert(VM_Version::supports_avx(), "");
5301   InstructionMark im(this);
5302   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5303   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5304   attributes.set_rex_vex_w_reverted();
5305   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5306   emit_int8(0x51);
5307   emit_operand(dst, src);
5308 }
5309 
5310 void Assembler::vsqrtps(XMMRegister dst, XMMRegister src, int vector_len) {
5311   assert(VM_Version::supports_avx(), "");
5312   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5313   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5314   emit_int8(0x51);
5315   emit_int8((unsigned char)(0xC0 | encode));
5316 }
5317 
5318 void Assembler::vsqrtps(XMMRegister dst, Address src, int vector_len) {
5319   assert(VM_Version::supports_avx(), "");
5320   InstructionMark im(this);
5321   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5322   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5323   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5324   emit_int8(0x51);
5325   emit_operand(dst, src);
5326 }
5327 
5328 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
5329   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5330   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5331   attributes.set_rex_vex_w_reverted();
5332   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5333   emit_int8(0x54);
5334   emit_int8((unsigned char)(0xC0 | encode));
5335 }
5336 
5337 void Assembler::andps(XMMRegister dst, XMMRegister src) {
5338   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5339   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5340   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5341   emit_int8(0x54);
5342   emit_int8((unsigned char)(0xC0 | encode));
5343 }
5344 
5345 void Assembler::andps(XMMRegister dst, Address src) {
5346   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5347   InstructionMark im(this);
5348   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5349   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5350   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5351   emit_int8(0x54);
5352   emit_operand(dst, src);
5353 }
5354 
5355 void Assembler::andpd(XMMRegister dst, Address src) {
5356   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5357   InstructionMark im(this);
5358   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5359   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5360   attributes.set_rex_vex_w_reverted();
5361   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5362   emit_int8(0x54);
5363   emit_operand(dst, src);
5364 }
5365 
5366 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5367   assert(VM_Version::supports_avx(), "");
5368   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5369   attributes.set_rex_vex_w_reverted();
5370   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5371   emit_int8(0x54);
5372   emit_int8((unsigned char)(0xC0 | encode));
5373 }
5374 
5375 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5376   assert(VM_Version::supports_avx(), "");
5377   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5378   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5379   emit_int8(0x54);
5380   emit_int8((unsigned char)(0xC0 | encode));
5381 }
5382 
5383 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5384   assert(VM_Version::supports_avx(), "");
5385   InstructionMark im(this);
5386   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5387   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5388   attributes.set_rex_vex_w_reverted();
5389   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5390   emit_int8(0x54);
5391   emit_operand(dst, src);
5392 }
5393 
5394 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5395   assert(VM_Version::supports_avx(), "");
5396   InstructionMark im(this);
5397   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5398   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5399   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5400   emit_int8(0x54);
5401   emit_operand(dst, src);
5402 }
5403 
5404 void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
5405   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5406   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5407   attributes.set_rex_vex_w_reverted();
5408   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5409   emit_int8(0x15);
5410   emit_int8((unsigned char)(0xC0 | encode));
5411 }
5412 
5413 void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
5414   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5415   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5416   attributes.set_rex_vex_w_reverted();
5417   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5418   emit_int8(0x14);
5419   emit_int8((unsigned char)(0xC0 | encode));
5420 }
5421 
5422 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
5423   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5424   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5425   attributes.set_rex_vex_w_reverted();
5426   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5427   emit_int8(0x57);
5428   emit_int8((unsigned char)(0xC0 | encode));
5429 }
5430 
5431 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
5432   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5433   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5434   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5435   emit_int8(0x57);
5436   emit_int8((unsigned char)(0xC0 | encode));
5437 }
5438 
5439 void Assembler::xorpd(XMMRegister dst, Address src) {
5440   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5441   InstructionMark im(this);
5442   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5443   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5444   attributes.set_rex_vex_w_reverted();
5445   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5446   emit_int8(0x57);
5447   emit_operand(dst, src);
5448 }
5449 
5450 void Assembler::xorps(XMMRegister dst, Address src) {
5451   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5452   InstructionMark im(this);
5453   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5454   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5455   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5456   emit_int8(0x57);
5457   emit_operand(dst, src);
5458 }
5459 
5460 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5461   assert(VM_Version::supports_avx(), "");
5462   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5463   attributes.set_rex_vex_w_reverted();
5464   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5465   emit_int8(0x57);
5466   emit_int8((unsigned char)(0xC0 | encode));
5467 }
5468 
5469 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5470   assert(VM_Version::supports_avx(), "");
5471   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5472   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5473   emit_int8(0x57);
5474   emit_int8((unsigned char)(0xC0 | encode));
5475 }
5476 
5477 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5478   assert(VM_Version::supports_avx(), "");
5479   InstructionMark im(this);
5480   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5481   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5482   attributes.set_rex_vex_w_reverted();
5483   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5484   emit_int8(0x57);
5485   emit_operand(dst, src);
5486 }
5487 
5488 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5489   assert(VM_Version::supports_avx(), "");
5490   InstructionMark im(this);
5491   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5492   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5493   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5494   emit_int8(0x57);
5495   emit_operand(dst, src);
5496 }
5497 
5498 // Integer vector arithmetic
5499 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5500   assert(VM_Version::supports_avx() && (vector_len == 0) ||
5501          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
5502   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5503   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5504   emit_int8(0x01);
5505   emit_int8((unsigned char)(0xC0 | encode));
5506 }
5507 
5508 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5509   assert(VM_Version::supports_avx() && (vector_len == 0) ||
5510          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
5511   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
5512   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5513   emit_int8(0x02);
5514   emit_int8((unsigned char)(0xC0 | encode));
5515 }
5516 
5517 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
5518   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5519   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5520   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5521   emit_int8((unsigned char)0xFC);
5522   emit_int8((unsigned char)(0xC0 | encode));
5523 }
5524 
5525 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
5526   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5527   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5528   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5529   emit_int8((unsigned char)0xFD);
5530   emit_int8((unsigned char)(0xC0 | encode));
5531 }
5532 
5533 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
5534   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5535   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5536   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5537   emit_int8((unsigned char)0xFE);
5538   emit_int8((unsigned char)(0xC0 | encode));
5539 }
5540 
5541 void Assembler::paddd(XMMRegister dst, Address src) {
5542   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5543   InstructionMark im(this);
5544   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5545   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5546   emit_int8((unsigned char)0xFE);
5547   emit_operand(dst, src);
5548 }
5549 
5550 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
5551   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5552   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5553   attributes.set_rex_vex_w_reverted();
5554   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5555   emit_int8((unsigned char)0xD4);
5556   emit_int8((unsigned char)(0xC0 | encode));
5557 }
5558 
5559 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
5560   assert(VM_Version::supports_sse3(), "");
5561   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5562   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5563   emit_int8(0x01);
5564   emit_int8((unsigned char)(0xC0 | encode));
5565 }
5566 
5567 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
5568   assert(VM_Version::supports_sse3(), "");
5569   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
5570   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5571   emit_int8(0x02);
5572   emit_int8((unsigned char)(0xC0 | encode));
5573 }
5574 
5575 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5576   assert(UseAVX > 0, "requires some form of AVX");
5577   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5578   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5579   emit_int8((unsigned char)0xFC);
5580   emit_int8((unsigned char)(0xC0 | encode));
5581 }
5582 
5583 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5584   assert(UseAVX > 0, "requires some form of AVX");
5585   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5586   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5587   emit_int8((unsigned char)0xFD);
5588   emit_int8((unsigned char)(0xC0 | encode));
5589 }
5590 
5591 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5592   assert(UseAVX > 0, "requires some form of AVX");
5593   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5594   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5595   emit_int8((unsigned char)0xFE);
5596   emit_int8((unsigned char)(0xC0 | encode));
5597 }
5598 
5599 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5600   assert(UseAVX > 0, "requires some form of AVX");
5601   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5602   attributes.set_rex_vex_w_reverted();
5603   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5604   emit_int8((unsigned char)0xD4);
5605   emit_int8((unsigned char)(0xC0 | encode));
5606 }
5607 
5608 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5609   assert(UseAVX > 0, "requires some form of AVX");
5610   InstructionMark im(this);
5611   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5612   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5613   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5614   emit_int8((unsigned char)0xFC);
5615   emit_operand(dst, src);
5616 }
5617 
5618 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5619   assert(UseAVX > 0, "requires some form of AVX");
5620   InstructionMark im(this);
5621   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5622   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5623   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5624   emit_int8((unsigned char)0xFD);
5625   emit_operand(dst, src);
5626 }
5627 
5628 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5629   assert(UseAVX > 0, "requires some form of AVX");
5630   InstructionMark im(this);
5631   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5632   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5633   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5634   emit_int8((unsigned char)0xFE);
5635   emit_operand(dst, src);
5636 }
5637 
5638 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5639   assert(UseAVX > 0, "requires some form of AVX");
5640   InstructionMark im(this);
5641   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5642   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5643   attributes.set_rex_vex_w_reverted();
5644   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5645   emit_int8((unsigned char)0xD4);
5646   emit_operand(dst, src);
5647 }
5648 
5649 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
5650   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5651   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5652   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5653   emit_int8((unsigned char)0xF8);
5654   emit_int8((unsigned char)(0xC0 | encode));
5655 }
5656 
5657 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
5658   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5659   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5660   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5661   emit_int8((unsigned char)0xF9);
5662   emit_int8((unsigned char)(0xC0 | encode));
5663 }
5664 
5665 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
5666   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5667   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5668   emit_int8((unsigned char)0xFA);
5669   emit_int8((unsigned char)(0xC0 | encode));
5670 }
5671 
5672 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
5673   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5674   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5675   attributes.set_rex_vex_w_reverted();
5676   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5677   emit_int8((unsigned char)0xFB);
5678   emit_int8((unsigned char)(0xC0 | encode));
5679 }
5680 
5681 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5682   assert(UseAVX > 0, "requires some form of AVX");
5683   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5684   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5685   emit_int8((unsigned char)0xF8);
5686   emit_int8((unsigned char)(0xC0 | encode));
5687 }
5688 
5689 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5690   assert(UseAVX > 0, "requires some form of AVX");
5691   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5692   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5693   emit_int8((unsigned char)0xF9);
5694   emit_int8((unsigned char)(0xC0 | encode));
5695 }
5696 
5697 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5698   assert(UseAVX > 0, "requires some form of AVX");
5699   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5700   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5701   emit_int8((unsigned char)0xFA);
5702   emit_int8((unsigned char)(0xC0 | encode));
5703 }
5704 
5705 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5706   assert(UseAVX > 0, "requires some form of AVX");
5707   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5708   attributes.set_rex_vex_w_reverted();
5709   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5710   emit_int8((unsigned char)0xFB);
5711   emit_int8((unsigned char)(0xC0 | encode));
5712 }
5713 
5714 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5715   assert(UseAVX > 0, "requires some form of AVX");
5716   InstructionMark im(this);
5717   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5718   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5719   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5720   emit_int8((unsigned char)0xF8);
5721   emit_operand(dst, src);
5722 }
5723 
5724 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5725   assert(UseAVX > 0, "requires some form of AVX");
5726   InstructionMark im(this);
5727   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5728   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5729   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5730   emit_int8((unsigned char)0xF9);
5731   emit_operand(dst, src);
5732 }
5733 
5734 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5735   assert(UseAVX > 0, "requires some form of AVX");
5736   InstructionMark im(this);
5737   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5738   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5739   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5740   emit_int8((unsigned char)0xFA);
5741   emit_operand(dst, src);
5742 }
5743 
5744 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5745   assert(UseAVX > 0, "requires some form of AVX");
5746   InstructionMark im(this);
5747   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5748   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5749   attributes.set_rex_vex_w_reverted();
5750   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5751   emit_int8((unsigned char)0xFB);
5752   emit_operand(dst, src);
5753 }
5754 
5755 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
5756   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5757   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5758   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5759   emit_int8((unsigned char)0xD5);
5760   emit_int8((unsigned char)(0xC0 | encode));
5761 }
5762 
5763 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
5764   assert(VM_Version::supports_sse4_1(), "");
5765   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5766   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5767   emit_int8(0x40);
5768   emit_int8((unsigned char)(0xC0 | encode));
5769 }
5770 
5771 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5772   assert(UseAVX > 0, "requires some form of AVX");
5773   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5774   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5775   emit_int8((unsigned char)0xD5);
5776   emit_int8((unsigned char)(0xC0 | encode));
5777 }
5778 
5779 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5780   assert(UseAVX > 0, "requires some form of AVX");
5781   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5782   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5783   emit_int8(0x40);
5784   emit_int8((unsigned char)(0xC0 | encode));
5785 }
5786 
5787 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5788   assert(UseAVX > 2, "requires some form of EVEX");
5789   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5790   attributes.set_is_evex_instruction();
5791   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5792   emit_int8(0x40);
5793   emit_int8((unsigned char)(0xC0 | encode));
5794 }
5795 
5796 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5797   assert(UseAVX > 0, "requires some form of AVX");
5798   InstructionMark im(this);
5799   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5800   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5801   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5802   emit_int8((unsigned char)0xD5);
5803   emit_operand(dst, src);
5804 }
5805 
5806 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5807   assert(UseAVX > 0, "requires some form of AVX");
5808   InstructionMark im(this);
5809   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5810   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5811   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5812   emit_int8(0x40);
5813   emit_operand(dst, src);
5814 }
5815 
5816 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5817   assert(UseAVX > 2, "requires some form of EVEX");
5818   InstructionMark im(this);
5819   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5820   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5821   attributes.set_is_evex_instruction();
5822   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5823   emit_int8(0x40);
5824   emit_operand(dst, src);
5825 }
5826 
5827 // Shift packed integers left by specified number of bits.
5828 void Assembler::psllw(XMMRegister dst, int shift) {
5829   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5830   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5831   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
5832   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5833   emit_int8(0x71);
5834   emit_int8((unsigned char)(0xC0 | encode));
5835   emit_int8(shift & 0xFF);
5836 }
5837 
5838 void Assembler::pslld(XMMRegister dst, int shift) {
5839   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5840   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5841   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
5842   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5843   emit_int8(0x72);
5844   emit_int8((unsigned char)(0xC0 | encode));
5845   emit_int8(shift & 0xFF);
5846 }
5847 
5848 void Assembler::psllq(XMMRegister dst, int shift) {
5849   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5850   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5851   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
5852   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5853   emit_int8(0x73);
5854   emit_int8((unsigned char)(0xC0 | encode));
5855   emit_int8(shift & 0xFF);
5856 }
5857 
5858 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
5859   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5860   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5861   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5862   emit_int8((unsigned char)0xF1);
5863   emit_int8((unsigned char)(0xC0 | encode));
5864 }
5865 
5866 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
5867   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5868   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5869   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5870   emit_int8((unsigned char)0xF2);
5871   emit_int8((unsigned char)(0xC0 | encode));
5872 }
5873 
5874 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
5875   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5876   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5877   attributes.set_rex_vex_w_reverted();
5878   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5879   emit_int8((unsigned char)0xF3);
5880   emit_int8((unsigned char)(0xC0 | encode));
5881 }
5882 
5883 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5884   assert(UseAVX > 0, "requires some form of AVX");
5885   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5886   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
5887   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5888   emit_int8(0x71);
5889   emit_int8((unsigned char)(0xC0 | encode));
5890   emit_int8(shift & 0xFF);
5891 }
5892 
5893 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5894   assert(UseAVX > 0, "requires some form of AVX");
5895   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5896   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5897   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
5898   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5899   emit_int8(0x72);
5900   emit_int8((unsigned char)(0xC0 | encode));
5901   emit_int8(shift & 0xFF);
5902 }
5903 
5904 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5905   assert(UseAVX > 0, "requires some form of AVX");
5906   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5907   attributes.set_rex_vex_w_reverted();
5908   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
5909   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5910   emit_int8(0x73);
5911   emit_int8((unsigned char)(0xC0 | encode));
5912   emit_int8(shift & 0xFF);
5913 }
5914 
5915 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5916   assert(UseAVX > 0, "requires some form of AVX");
5917   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5918   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5919   emit_int8((unsigned char)0xF1);
5920   emit_int8((unsigned char)(0xC0 | encode));
5921 }
5922 
5923 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5924   assert(UseAVX > 0, "requires some form of AVX");
5925   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5926   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5927   emit_int8((unsigned char)0xF2);
5928   emit_int8((unsigned char)(0xC0 | encode));
5929 }
5930 
5931 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5932   assert(UseAVX > 0, "requires some form of AVX");
5933   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5934   attributes.set_rex_vex_w_reverted();
5935   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5936   emit_int8((unsigned char)0xF3);
5937   emit_int8((unsigned char)(0xC0 | encode));
5938 }
5939 
5940 // Shift packed integers logically right by specified number of bits.
5941 void Assembler::psrlw(XMMRegister dst, int shift) {
5942   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5943   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5944   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
5945   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5946   emit_int8(0x71);
5947   emit_int8((unsigned char)(0xC0 | encode));
5948   emit_int8(shift & 0xFF);
5949 }
5950 
5951 void Assembler::psrld(XMMRegister dst, int shift) {
5952   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5953   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5954   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
5955   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5956   emit_int8(0x72);
5957   emit_int8((unsigned char)(0xC0 | encode));
5958   emit_int8(shift & 0xFF);
5959 }
5960 
5961 void Assembler::psrlq(XMMRegister dst, int shift) {
5962   // Do not confuse it with psrldq SSE2 instruction which
5963   // shifts 128 bit value in xmm register by number of bytes.
5964   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5965   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5966   attributes.set_rex_vex_w_reverted();
5967   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
5968   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5969   emit_int8(0x73);
5970   emit_int8((unsigned char)(0xC0 | encode));
5971   emit_int8(shift & 0xFF);
5972 }
5973 
5974 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
5975   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5976   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5977   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5978   emit_int8((unsigned char)0xD1);
5979   emit_int8((unsigned char)(0xC0 | encode));
5980 }
5981 
5982 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
5983   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5984   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5985   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5986   emit_int8((unsigned char)0xD2);
5987   emit_int8((unsigned char)(0xC0 | encode));
5988 }
5989 
5990 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
5991   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5992   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5993   attributes.set_rex_vex_w_reverted();
5994   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5995   emit_int8((unsigned char)0xD3);
5996   emit_int8((unsigned char)(0xC0 | encode));
5997 }
5998 
5999 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6000   assert(UseAVX > 0, "requires some form of AVX");
6001   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6002   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
6003   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6004   emit_int8(0x71);
6005   emit_int8((unsigned char)(0xC0 | encode));
6006   emit_int8(shift & 0xFF);
6007 }
6008 
6009 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6010   assert(UseAVX > 0, "requires some form of AVX");
6011   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6012   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
6013   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6014   emit_int8(0x72);
6015   emit_int8((unsigned char)(0xC0 | encode));
6016   emit_int8(shift & 0xFF);
6017 }
6018 
6019 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6020   assert(UseAVX > 0, "requires some form of AVX");
6021   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6022   attributes.set_rex_vex_w_reverted();
6023   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
6024   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6025   emit_int8(0x73);
6026   emit_int8((unsigned char)(0xC0 | encode));
6027   emit_int8(shift & 0xFF);
6028 }
6029 
6030 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6031   assert(UseAVX > 0, "requires some form of AVX");
6032   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6033   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6034   emit_int8((unsigned char)0xD1);
6035   emit_int8((unsigned char)(0xC0 | encode));
6036 }
6037 
6038 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6039   assert(UseAVX > 0, "requires some form of AVX");
6040   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6041   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6042   emit_int8((unsigned char)0xD2);
6043   emit_int8((unsigned char)(0xC0 | encode));
6044 }
6045 
6046 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6047   assert(UseAVX > 0, "requires some form of AVX");
6048   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6049   attributes.set_rex_vex_w_reverted();
6050   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6051   emit_int8((unsigned char)0xD3);
6052   emit_int8((unsigned char)(0xC0 | encode));
6053 }
6054 
6055 // Shift packed integers arithmetically right by specified number of bits.
6056 void Assembler::psraw(XMMRegister dst, int shift) {
6057   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6058   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6059   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6060   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6061   emit_int8(0x71);
6062   emit_int8((unsigned char)(0xC0 | encode));
6063   emit_int8(shift & 0xFF);
6064 }
6065 
6066 void Assembler::psrad(XMMRegister dst, int shift) {
6067   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6068   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6069   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
6070   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6071   emit_int8(0x72);
6072   emit_int8((unsigned char)(0xC0 | encode));
6073   emit_int8(shift & 0xFF);
6074 }
6075 
6076 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
6077   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6078   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6079   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6080   emit_int8((unsigned char)0xE1);
6081   emit_int8((unsigned char)(0xC0 | encode));
6082 }
6083 
6084 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
6085   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6086   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6087   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6088   emit_int8((unsigned char)0xE2);
6089   emit_int8((unsigned char)(0xC0 | encode));
6090 }
6091 
6092 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6093   assert(UseAVX > 0, "requires some form of AVX");
6094   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6095   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6096   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6097   emit_int8(0x71);
6098   emit_int8((unsigned char)(0xC0 | encode));
6099   emit_int8(shift & 0xFF);
6100 }
6101 
6102 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6103   assert(UseAVX > 0, "requires some form of AVX");
6104   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6105   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6106   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6107   emit_int8(0x72);
6108   emit_int8((unsigned char)(0xC0 | encode));
6109   emit_int8(shift & 0xFF);
6110 }
6111 
6112 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6113   assert(UseAVX > 0, "requires some form of AVX");
6114   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6115   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6116   emit_int8((unsigned char)0xE1);
6117   emit_int8((unsigned char)(0xC0 | encode));
6118 }
6119 
6120 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6121   assert(UseAVX > 0, "requires some form of AVX");
6122   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6123   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6124   emit_int8((unsigned char)0xE2);
6125   emit_int8((unsigned char)(0xC0 | encode));
6126 }
6127 
6128 
6129 // logical operations packed integers
6130 void Assembler::pand(XMMRegister dst, XMMRegister src) {
6131   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6132   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6133   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6134   emit_int8((unsigned char)0xDB);
6135   emit_int8((unsigned char)(0xC0 | encode));
6136 }
6137 
6138 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6139   assert(UseAVX > 0, "requires some form of AVX");
6140   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6141   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6142   emit_int8((unsigned char)0xDB);
6143   emit_int8((unsigned char)(0xC0 | encode));
6144 }
6145 
6146 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6147   assert(UseAVX > 0, "requires some form of AVX");
6148   InstructionMark im(this);
6149   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6150   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6151   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6152   emit_int8((unsigned char)0xDB);
6153   emit_operand(dst, src);
6154 }
6155 
6156 void Assembler::pandn(XMMRegister dst, XMMRegister src) {
6157   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6158   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6159   attributes.set_rex_vex_w_reverted();
6160   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6161   emit_int8((unsigned char)0xDF);
6162   emit_int8((unsigned char)(0xC0 | encode));
6163 }
6164 
6165 void Assembler::por(XMMRegister dst, XMMRegister src) {
6166   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6167   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6168   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6169   emit_int8((unsigned char)0xEB);
6170   emit_int8((unsigned char)(0xC0 | encode));
6171 }
6172 
6173 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6174   assert(UseAVX > 0, "requires some form of AVX");
6175   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6176   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6177   emit_int8((unsigned char)0xEB);
6178   emit_int8((unsigned char)(0xC0 | encode));
6179 }
6180 
6181 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6182   assert(UseAVX > 0, "requires some form of AVX");
6183   InstructionMark im(this);
6184   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6185   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6186   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6187   emit_int8((unsigned char)0xEB);
6188   emit_operand(dst, src);
6189 }
6190 
6191 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
6192   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6193   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6194   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6195   emit_int8((unsigned char)0xEF);
6196   emit_int8((unsigned char)(0xC0 | encode));
6197 }
6198 
6199 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6200   assert(UseAVX > 0, "requires some form of AVX");
6201   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6202   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6203   emit_int8((unsigned char)0xEF);
6204   emit_int8((unsigned char)(0xC0 | encode));
6205 }
6206 
6207 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6208   assert(UseAVX > 0, "requires some form of AVX");
6209   InstructionMark im(this);
6210   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6211   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6212   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6213   emit_int8((unsigned char)0xEF);
6214   emit_operand(dst, src);
6215 }
6216 
6217 void Assembler::evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6218   assert(VM_Version::supports_evex(), "requires EVEX support");
6219   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6220   attributes.set_is_evex_instruction();
6221   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6222   emit_int8((unsigned char)0xEF);
6223   emit_int8((unsigned char)(0xC0 | encode));
6224 }
6225 
6226 void Assembler::evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6227   assert(VM_Version::supports_evex(), "requires EVEX support");
6228   assert(dst != xnoreg, "sanity");
6229   InstructionMark im(this);
6230   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6231   attributes.set_is_evex_instruction();
6232   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
6233   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6234   emit_int8((unsigned char)0xEF);
6235   emit_operand(dst, src);
6236 }
6237 
6238 
6239 // vinserti forms
6240 
6241 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6242   assert(VM_Version::supports_avx2(), "");
6243   assert(imm8 <= 0x01, "imm8: %u", imm8);
6244   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6245   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6246   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6247   emit_int8(0x38);
6248   emit_int8((unsigned char)(0xC0 | encode));
6249   // 0x00 - insert into lower 128 bits
6250   // 0x01 - insert into upper 128 bits
6251   emit_int8(imm8 & 0x01);
6252 }
6253 
6254 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6255   assert(VM_Version::supports_avx2(), "");
6256   assert(dst != xnoreg, "sanity");
6257   assert(imm8 <= 0x01, "imm8: %u", imm8);
6258   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6259   InstructionMark im(this);
6260   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6261   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6262   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6263   emit_int8(0x38);
6264   emit_operand(dst, src);
6265   // 0x00 - insert into lower 128 bits
6266   // 0x01 - insert into upper 128 bits
6267   emit_int8(imm8 & 0x01);
6268 }
6269 
6270 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6271   assert(VM_Version::supports_evex(), "");
6272   assert(imm8 <= 0x03, "imm8: %u", imm8);
6273   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6274   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6275   emit_int8(0x38);
6276   emit_int8((unsigned char)(0xC0 | encode));
6277   // 0x00 - insert into q0 128 bits (0..127)
6278   // 0x01 - insert into q1 128 bits (128..255)
6279   // 0x02 - insert into q2 128 bits (256..383)
6280   // 0x03 - insert into q3 128 bits (384..511)
6281   emit_int8(imm8 & 0x03);
6282 }
6283 
6284 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6285   assert(VM_Version::supports_avx(), "");
6286   assert(dst != xnoreg, "sanity");
6287   assert(imm8 <= 0x03, "imm8: %u", imm8);
6288   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6289   InstructionMark im(this);
6290   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6291   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6292   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6293   emit_int8(0x18);
6294   emit_operand(dst, src);
6295   // 0x00 - insert into q0 128 bits (0..127)
6296   // 0x01 - insert into q1 128 bits (128..255)
6297   // 0x02 - insert into q2 128 bits (256..383)
6298   // 0x03 - insert into q3 128 bits (384..511)
6299   emit_int8(imm8 & 0x03);
6300 }
6301 
6302 void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6303   assert(VM_Version::supports_evex(), "");
6304   assert(imm8 <= 0x01, "imm8: %u", imm8);
6305   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6306   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6307   emit_int8(0x38);
6308   emit_int8((unsigned char)(0xC0 | encode));
6309   // 0x00 - insert into lower 256 bits
6310   // 0x01 - insert into upper 256 bits
6311   emit_int8(imm8 & 0x01);
6312 }
6313 
6314 
6315 // vinsertf forms
6316 
6317 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6318   assert(VM_Version::supports_avx(), "");
6319   assert(imm8 <= 0x01, "imm8: %u", imm8);
6320   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6321   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6322   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6323   emit_int8(0x18);
6324   emit_int8((unsigned char)(0xC0 | encode));
6325   // 0x00 - insert into lower 128 bits
6326   // 0x01 - insert into upper 128 bits
6327   emit_int8(imm8 & 0x01);
6328 }
6329 
6330 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6331   assert(VM_Version::supports_avx(), "");
6332   assert(dst != xnoreg, "sanity");
6333   assert(imm8 <= 0x01, "imm8: %u", imm8);
6334   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6335   InstructionMark im(this);
6336   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6337   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6338   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6339   emit_int8(0x18);
6340   emit_operand(dst, src);
6341   // 0x00 - insert into lower 128 bits
6342   // 0x01 - insert into upper 128 bits
6343   emit_int8(imm8 & 0x01);
6344 }
6345 
6346 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6347   assert(VM_Version::supports_evex(), "");
6348   assert(imm8 <= 0x03, "imm8: %u", imm8);
6349   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6350   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6351   emit_int8(0x18);
6352   emit_int8((unsigned char)(0xC0 | encode));
6353   // 0x00 - insert into q0 128 bits (0..127)
6354   // 0x01 - insert into q1 128 bits (128..255)
6355   // 0x02 - insert into q2 128 bits (256..383)
6356   // 0x03 - insert into q3 128 bits (384..511)
6357   emit_int8(imm8 & 0x03);
6358 }
6359 
6360 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6361   assert(VM_Version::supports_avx(), "");
6362   assert(dst != xnoreg, "sanity");
6363   assert(imm8 <= 0x03, "imm8: %u", imm8);
6364   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6365   InstructionMark im(this);
6366   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6367   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6368   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6369   emit_int8(0x18);
6370   emit_operand(dst, src);
6371   // 0x00 - insert into q0 128 bits (0..127)
6372   // 0x01 - insert into q1 128 bits (128..255)
6373   // 0x02 - insert into q2 128 bits (256..383)
6374   // 0x03 - insert into q3 128 bits (384..511)
6375   emit_int8(imm8 & 0x03);
6376 }
6377 
6378 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6379   assert(VM_Version::supports_evex(), "");
6380   assert(imm8 <= 0x01, "imm8: %u", imm8);
6381   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6382   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6383   emit_int8(0x1A);
6384   emit_int8((unsigned char)(0xC0 | encode));
6385   // 0x00 - insert into lower 256 bits
6386   // 0x01 - insert into upper 256 bits
6387   emit_int8(imm8 & 0x01);
6388 }
6389 
6390 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6391   assert(VM_Version::supports_evex(), "");
6392   assert(dst != xnoreg, "sanity");
6393   assert(imm8 <= 0x01, "imm8: %u", imm8);
6394   InstructionMark im(this);
6395   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6396   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
6397   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6398   emit_int8(0x1A);
6399   emit_operand(dst, src);
6400   // 0x00 - insert into lower 256 bits
6401   // 0x01 - insert into upper 256 bits
6402   emit_int8(imm8 & 0x01);
6403 }
6404 
6405 
6406 // vextracti forms
6407 
6408 void Assembler::vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6409   assert(VM_Version::supports_avx(), "");
6410   assert(imm8 <= 0x01, "imm8: %u", imm8);
6411   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6412   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6413   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6414   emit_int8(0x39);
6415   emit_int8((unsigned char)(0xC0 | encode));
6416   // 0x00 - extract from lower 128 bits
6417   // 0x01 - extract from upper 128 bits
6418   emit_int8(imm8 & 0x01);
6419 }
6420 
6421 void Assembler::vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
6422   assert(VM_Version::supports_avx2(), "");
6423   assert(src != xnoreg, "sanity");
6424   assert(imm8 <= 0x01, "imm8: %u", imm8);
6425   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6426   InstructionMark im(this);
6427   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6428   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6429   attributes.reset_is_clear_context();
6430   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6431   emit_int8(0x39);
6432   emit_operand(src, dst);
6433   // 0x00 - extract from lower 128 bits
6434   // 0x01 - extract from upper 128 bits
6435   emit_int8(imm8 & 0x01);
6436 }
6437 
6438 void Assembler::vextracti32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6439   assert(VM_Version::supports_avx(), "");
6440   assert(imm8 <= 0x03, "imm8: %u", imm8);
6441   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6442   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6443   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6444   emit_int8(0x39);
6445   emit_int8((unsigned char)(0xC0 | encode));
6446   // 0x00 - extract from bits 127:0
6447   // 0x01 - extract from bits 255:128
6448   // 0x02 - extract from bits 383:256
6449   // 0x03 - extract from bits 511:384
6450   emit_int8(imm8 & 0x03);
6451 }
6452 
6453 void Assembler::vextracti32x4(Address dst, XMMRegister src, uint8_t imm8) {
6454   assert(VM_Version::supports_evex(), "");
6455   assert(src != xnoreg, "sanity");
6456   assert(imm8 <= 0x03, "imm8: %u", imm8);
6457   InstructionMark im(this);
6458   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6459   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6460   attributes.reset_is_clear_context();
6461   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6462   emit_int8(0x39);
6463   emit_operand(src, dst);
6464   // 0x00 - extract from bits 127:0
6465   // 0x01 - extract from bits 255:128
6466   // 0x02 - extract from bits 383:256
6467   // 0x03 - extract from bits 511:384
6468   emit_int8(imm8 & 0x03);
6469 }
6470 
6471 void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6472   assert(VM_Version::supports_avx512dq(), "");
6473   assert(imm8 <= 0x03, "imm8: %u", imm8);
6474   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6475   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6476   emit_int8(0x39);
6477   emit_int8((unsigned char)(0xC0 | encode));
6478   // 0x00 - extract from bits 127:0
6479   // 0x01 - extract from bits 255:128
6480   // 0x02 - extract from bits 383:256
6481   // 0x03 - extract from bits 511:384
6482   emit_int8(imm8 & 0x03);
6483 }
6484 
6485 void Assembler::vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6486   assert(VM_Version::supports_evex(), "");
6487   assert(imm8 <= 0x01, "imm8: %u", imm8);
6488   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6489   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6490   emit_int8(0x3B);
6491   emit_int8((unsigned char)(0xC0 | encode));
6492   // 0x00 - extract from lower 256 bits
6493   // 0x01 - extract from upper 256 bits
6494   emit_int8(imm8 & 0x01);
6495 }
6496 
6497 
6498 // vextractf forms
6499 
6500 void Assembler::vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6501   assert(VM_Version::supports_avx(), "");
6502   assert(imm8 <= 0x01, "imm8: %u", imm8);
6503   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6504   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6505   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6506   emit_int8(0x19);
6507   emit_int8((unsigned char)(0xC0 | encode));
6508   // 0x00 - extract from lower 128 bits
6509   // 0x01 - extract from upper 128 bits
6510   emit_int8(imm8 & 0x01);
6511 }
6512 
6513 void Assembler::vextractf128(Address dst, XMMRegister src, uint8_t imm8) {
6514   assert(VM_Version::supports_avx(), "");
6515   assert(src != xnoreg, "sanity");
6516   assert(imm8 <= 0x01, "imm8: %u", imm8);
6517   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6518   InstructionMark im(this);
6519   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6520   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6521   attributes.reset_is_clear_context();
6522   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6523   emit_int8(0x19);
6524   emit_operand(src, dst);
6525   // 0x00 - extract from lower 128 bits
6526   // 0x01 - extract from upper 128 bits
6527   emit_int8(imm8 & 0x01);
6528 }
6529 
6530 void Assembler::vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6531   assert(VM_Version::supports_avx(), "");
6532   assert(imm8 <= 0x03, "imm8: %u", imm8);
6533   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6534   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6535   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6536   emit_int8(0x19);
6537   emit_int8((unsigned char)(0xC0 | encode));
6538   // 0x00 - extract from bits 127:0
6539   // 0x01 - extract from bits 255:128
6540   // 0x02 - extract from bits 383:256
6541   // 0x03 - extract from bits 511:384
6542   emit_int8(imm8 & 0x03);
6543 }
6544 
6545 void Assembler::vextractf32x4(Address dst, XMMRegister src, uint8_t imm8) {
6546   assert(VM_Version::supports_evex(), "");
6547   assert(src != xnoreg, "sanity");
6548   assert(imm8 <= 0x03, "imm8: %u", imm8);
6549   InstructionMark im(this);
6550   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6551   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6552   attributes.reset_is_clear_context();
6553   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6554   emit_int8(0x19);
6555   emit_operand(src, dst);
6556   // 0x00 - extract from bits 127:0
6557   // 0x01 - extract from bits 255:128
6558   // 0x02 - extract from bits 383:256
6559   // 0x03 - extract from bits 511:384
6560   emit_int8(imm8 & 0x03);
6561 }
6562 
6563 void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6564   assert(VM_Version::supports_avx512dq(), "");
6565   assert(imm8 <= 0x03, "imm8: %u", imm8);
6566   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6567   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6568   emit_int8(0x19);
6569   emit_int8((unsigned char)(0xC0 | encode));
6570   // 0x00 - extract from bits 127:0
6571   // 0x01 - extract from bits 255:128
6572   // 0x02 - extract from bits 383:256
6573   // 0x03 - extract from bits 511:384
6574   emit_int8(imm8 & 0x03);
6575 }
6576 
6577 void Assembler::vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6578   assert(VM_Version::supports_evex(), "");
6579   assert(imm8 <= 0x01, "imm8: %u", imm8);
6580   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6581   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6582   emit_int8(0x1B);
6583   emit_int8((unsigned char)(0xC0 | encode));
6584   // 0x00 - extract from lower 256 bits
6585   // 0x01 - extract from upper 256 bits
6586   emit_int8(imm8 & 0x01);
6587 }
6588 
6589 void Assembler::vextractf64x4(Address dst, XMMRegister src, uint8_t imm8) {
6590   assert(VM_Version::supports_evex(), "");
6591   assert(src != xnoreg, "sanity");
6592   assert(imm8 <= 0x01, "imm8: %u", imm8);
6593   InstructionMark im(this);
6594   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6595   attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */  EVEX_64bit);
6596   attributes.reset_is_clear_context();
6597   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6598   emit_int8(0x1B);
6599   emit_operand(src, dst);
6600   // 0x00 - extract from lower 256 bits
6601   // 0x01 - extract from upper 256 bits
6602   emit_int8(imm8 & 0x01);
6603 }
6604 
6605 
6606 // legacy word/dword replicate
6607 void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
6608   assert(VM_Version::supports_avx2(), "");
6609   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6610   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6611   emit_int8(0x79);
6612   emit_int8((unsigned char)(0xC0 | encode));
6613 }
6614 
6615 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
6616   assert(VM_Version::supports_avx2(), "");
6617   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6618   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6619   emit_int8(0x58);
6620   emit_int8((unsigned char)(0xC0 | encode));
6621 }
6622 
6623 
6624 // xmm/mem sourced byte/word/dword/qword replicate
6625 
6626 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6627 void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
6628   assert(VM_Version::supports_evex(), "");
6629   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6630   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6631   emit_int8(0x78);
6632   emit_int8((unsigned char)(0xC0 | encode));
6633 }
6634 
6635 void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
6636   assert(VM_Version::supports_evex(), "");
6637   assert(dst != xnoreg, "sanity");
6638   InstructionMark im(this);
6639   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6640   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
6641   // swap src<->dst for encoding
6642   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6643   emit_int8(0x78);
6644   emit_operand(dst, src);
6645 }
6646 
6647 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6648 void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
6649   assert(VM_Version::supports_evex(), "");
6650   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6651   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6652   emit_int8(0x79);
6653   emit_int8((unsigned char)(0xC0 | encode));
6654 }
6655 
6656 void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) {
6657   assert(VM_Version::supports_evex(), "");
6658   assert(dst != xnoreg, "sanity");
6659   InstructionMark im(this);
6660   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6661   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
6662   // swap src<->dst for encoding
6663   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6664   emit_int8(0x79);
6665   emit_operand(dst, src);
6666 }
6667 
6668 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
6669 void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
6670   assert(VM_Version::supports_evex(), "");
6671   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6672   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6673   emit_int8(0x58);
6674   emit_int8((unsigned char)(0xC0 | encode));
6675 }
6676 
6677 void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
6678   assert(VM_Version::supports_evex(), "");
6679   assert(dst != xnoreg, "sanity");
6680   InstructionMark im(this);
6681   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6682   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
6683   // swap src<->dst for encoding
6684   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6685   emit_int8(0x58);
6686   emit_operand(dst, src);
6687 }
6688 
6689 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
6690 void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
6691   assert(VM_Version::supports_evex(), "");
6692   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6693   attributes.set_rex_vex_w_reverted();
6694   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6695   emit_int8(0x59);
6696   emit_int8((unsigned char)(0xC0 | encode));
6697 }
6698 
6699 void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
6700   assert(VM_Version::supports_evex(), "");
6701   assert(dst != xnoreg, "sanity");
6702   InstructionMark im(this);
6703   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6704   attributes.set_rex_vex_w_reverted();
6705   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
6706   // swap src<->dst for encoding
6707   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6708   emit_int8(0x59);
6709   emit_operand(dst, src);
6710 }
6711 
6712 
6713 // scalar single/double precision replicate
6714 
6715 // duplicate single precision data from src into programmed locations in dest : requires AVX512VL
6716 void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
6717   assert(VM_Version::supports_evex(), "");
6718   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6719   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6720   emit_int8(0x18);
6721   emit_int8((unsigned char)(0xC0 | encode));
6722 }
6723 
6724 void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
6725   assert(VM_Version::supports_evex(), "");
6726   assert(dst != xnoreg, "sanity");
6727   InstructionMark im(this);
6728   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6729   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
6730   // swap src<->dst for encoding
6731   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6732   emit_int8(0x18);
6733   emit_operand(dst, src);
6734 }
6735 
6736 // duplicate double precision data from src into programmed locations in dest : requires AVX512VL
6737 void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
6738   assert(VM_Version::supports_evex(), "");
6739   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6740   attributes.set_rex_vex_w_reverted();
6741   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6742   emit_int8(0x19);
6743   emit_int8((unsigned char)(0xC0 | encode));
6744 }
6745 
6746 void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
6747   assert(VM_Version::supports_evex(), "");
6748   assert(dst != xnoreg, "sanity");
6749   InstructionMark im(this);
6750   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6751   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
6752   attributes.set_rex_vex_w_reverted();
6753   // swap src<->dst for encoding
6754   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6755   emit_int8(0x19);
6756   emit_operand(dst, src);
6757 }
6758 
6759 
6760 // gpr source broadcast forms
6761 
6762 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6763 void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
6764   assert(VM_Version::supports_evex(), "");
6765   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6766   attributes.set_is_evex_instruction();
6767   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6768   emit_int8(0x7A);
6769   emit_int8((unsigned char)(0xC0 | encode));
6770 }
6771 
6772 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6773 void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
6774   assert(VM_Version::supports_evex(), "");
6775   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6776   attributes.set_is_evex_instruction();
6777   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6778   emit_int8(0x7B);
6779   emit_int8((unsigned char)(0xC0 | encode));
6780 }
6781 
6782 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
6783 void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
6784   assert(VM_Version::supports_evex(), "");
6785   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6786   attributes.set_is_evex_instruction();
6787   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6788   emit_int8(0x7C);
6789   emit_int8((unsigned char)(0xC0 | encode));
6790 }
6791 
6792 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
6793 void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
6794   assert(VM_Version::supports_evex(), "");
6795   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6796   attributes.set_is_evex_instruction();
6797   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6798   emit_int8(0x7C);
6799   emit_int8((unsigned char)(0xC0 | encode));
6800 }
6801 
6802 
6803 // Carry-Less Multiplication Quadword
6804 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
6805   assert(VM_Version::supports_clmul(), "");
6806   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
6807   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6808   emit_int8(0x44);
6809   emit_int8((unsigned char)(0xC0 | encode));
6810   emit_int8((unsigned char)mask);
6811 }
6812 
6813 // Carry-Less Multiplication Quadword
6814 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
6815   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
6816   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
6817   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6818   emit_int8(0x44);
6819   emit_int8((unsigned char)(0xC0 | encode));
6820   emit_int8((unsigned char)mask);
6821 }
6822 
6823 void Assembler::evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len) {
6824   assert(VM_Version::supports_vpclmulqdq(), "Requires vector carryless multiplication support");
6825   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6826   attributes.set_is_evex_instruction();
6827   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6828   emit_int8(0x44);
6829   emit_int8((unsigned char)(0xC0 | encode));
6830   emit_int8((unsigned char)mask);
6831 }
6832 
6833 void Assembler::vzeroupper() {
6834   if (VM_Version::supports_vzeroupper()) {
6835     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
6836     (void)vex_prefix_and_encode(0, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6837     emit_int8(0x77);
6838   }
6839 }
6840 
6841 #ifndef _LP64
6842 // 32bit only pieces of the assembler
6843 
6844 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
6845   // NO PREFIX AS NEVER 64BIT
6846   InstructionMark im(this);
6847   emit_int8((unsigned char)0x81);
6848   emit_int8((unsigned char)(0xF8 | src1->encoding()));
6849   emit_data(imm32, rspec, 0);
6850 }
6851 
6852 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
6853   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
6854   InstructionMark im(this);
6855   emit_int8((unsigned char)0x81);
6856   emit_operand(rdi, src1);
6857   emit_data(imm32, rspec, 0);
6858 }
6859 
6860 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
6861 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
6862 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
6863 void Assembler::cmpxchg8(Address adr) {
6864   InstructionMark im(this);
6865   emit_int8(0x0F);
6866   emit_int8((unsigned char)0xC7);
6867   emit_operand(rcx, adr);
6868 }
6869 
6870 void Assembler::decl(Register dst) {
6871   // Don't use it directly. Use MacroAssembler::decrementl() instead.
6872  emit_int8(0x48 | dst->encoding());
6873 }
6874 
6875 #endif // _LP64
6876 
6877 // 64bit typically doesn't use the x87 but needs to for the trig funcs
6878 
6879 void Assembler::fabs() {
6880   emit_int8((unsigned char)0xD9);
6881   emit_int8((unsigned char)0xE1);
6882 }
6883 
6884 void Assembler::fadd(int i) {
6885   emit_farith(0xD8, 0xC0, i);
6886 }
6887 
6888 void Assembler::fadd_d(Address src) {
6889   InstructionMark im(this);
6890   emit_int8((unsigned char)0xDC);
6891   emit_operand32(rax, src);
6892 }
6893 
6894 void Assembler::fadd_s(Address src) {
6895   InstructionMark im(this);
6896   emit_int8((unsigned char)0xD8);
6897   emit_operand32(rax, src);
6898 }
6899 
6900 void Assembler::fadda(int i) {
6901   emit_farith(0xDC, 0xC0, i);
6902 }
6903 
6904 void Assembler::faddp(int i) {
6905   emit_farith(0xDE, 0xC0, i);
6906 }
6907 
6908 void Assembler::fchs() {
6909   emit_int8((unsigned char)0xD9);
6910   emit_int8((unsigned char)0xE0);
6911 }
6912 
6913 void Assembler::fcom(int i) {
6914   emit_farith(0xD8, 0xD0, i);
6915 }
6916 
6917 void Assembler::fcomp(int i) {
6918   emit_farith(0xD8, 0xD8, i);
6919 }
6920 
6921 void Assembler::fcomp_d(Address src) {
6922   InstructionMark im(this);
6923   emit_int8((unsigned char)0xDC);
6924   emit_operand32(rbx, src);
6925 }
6926 
6927 void Assembler::fcomp_s(Address src) {
6928   InstructionMark im(this);
6929   emit_int8((unsigned char)0xD8);
6930   emit_operand32(rbx, src);
6931 }
6932 
6933 void Assembler::fcompp() {
6934   emit_int8((unsigned char)0xDE);
6935   emit_int8((unsigned char)0xD9);
6936 }
6937 
6938 void Assembler::fcos() {
6939   emit_int8((unsigned char)0xD9);
6940   emit_int8((unsigned char)0xFF);
6941 }
6942 
6943 void Assembler::fdecstp() {
6944   emit_int8((unsigned char)0xD9);
6945   emit_int8((unsigned char)0xF6);
6946 }
6947 
6948 void Assembler::fdiv(int i) {
6949   emit_farith(0xD8, 0xF0, i);
6950 }
6951 
6952 void Assembler::fdiv_d(Address src) {
6953   InstructionMark im(this);
6954   emit_int8((unsigned char)0xDC);
6955   emit_operand32(rsi, src);
6956 }
6957 
6958 void Assembler::fdiv_s(Address src) {
6959   InstructionMark im(this);
6960   emit_int8((unsigned char)0xD8);
6961   emit_operand32(rsi, src);
6962 }
6963 
6964 void Assembler::fdiva(int i) {
6965   emit_farith(0xDC, 0xF8, i);
6966 }
6967 
6968 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
6969 //       is erroneous for some of the floating-point instructions below.
6970 
6971 void Assembler::fdivp(int i) {
6972   emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
6973 }
6974 
6975 void Assembler::fdivr(int i) {
6976   emit_farith(0xD8, 0xF8, i);
6977 }
6978 
6979 void Assembler::fdivr_d(Address src) {
6980   InstructionMark im(this);
6981   emit_int8((unsigned char)0xDC);
6982   emit_operand32(rdi, src);
6983 }
6984 
6985 void Assembler::fdivr_s(Address src) {
6986   InstructionMark im(this);
6987   emit_int8((unsigned char)0xD8);
6988   emit_operand32(rdi, src);
6989 }
6990 
6991 void Assembler::fdivra(int i) {
6992   emit_farith(0xDC, 0xF0, i);
6993 }
6994 
6995 void Assembler::fdivrp(int i) {
6996   emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
6997 }
6998 
6999 void Assembler::ffree(int i) {
7000   emit_farith(0xDD, 0xC0, i);
7001 }
7002 
7003 void Assembler::fild_d(Address adr) {
7004   InstructionMark im(this);
7005   emit_int8((unsigned char)0xDF);
7006   emit_operand32(rbp, adr);
7007 }
7008 
7009 void Assembler::fild_s(Address adr) {
7010   InstructionMark im(this);
7011   emit_int8((unsigned char)0xDB);
7012   emit_operand32(rax, adr);
7013 }
7014 
7015 void Assembler::fincstp() {
7016   emit_int8((unsigned char)0xD9);
7017   emit_int8((unsigned char)0xF7);
7018 }
7019 
7020 void Assembler::finit() {
7021   emit_int8((unsigned char)0x9B);
7022   emit_int8((unsigned char)0xDB);
7023   emit_int8((unsigned char)0xE3);
7024 }
7025 
7026 void Assembler::fist_s(Address adr) {
7027   InstructionMark im(this);
7028   emit_int8((unsigned char)0xDB);
7029   emit_operand32(rdx, adr);
7030 }
7031 
7032 void Assembler::fistp_d(Address adr) {
7033   InstructionMark im(this);
7034   emit_int8((unsigned char)0xDF);
7035   emit_operand32(rdi, adr);
7036 }
7037 
7038 void Assembler::fistp_s(Address adr) {
7039   InstructionMark im(this);
7040   emit_int8((unsigned char)0xDB);
7041   emit_operand32(rbx, adr);
7042 }
7043 
7044 void Assembler::fld1() {
7045   emit_int8((unsigned char)0xD9);
7046   emit_int8((unsigned char)0xE8);
7047 }
7048 
7049 void Assembler::fld_d(Address adr) {
7050   InstructionMark im(this);
7051   emit_int8((unsigned char)0xDD);
7052   emit_operand32(rax, adr);
7053 }
7054 
7055 void Assembler::fld_s(Address adr) {
7056   InstructionMark im(this);
7057   emit_int8((unsigned char)0xD9);
7058   emit_operand32(rax, adr);
7059 }
7060 
7061 
7062 void Assembler::fld_s(int index) {
7063   emit_farith(0xD9, 0xC0, index);
7064 }
7065 
7066 void Assembler::fld_x(Address adr) {
7067   InstructionMark im(this);
7068   emit_int8((unsigned char)0xDB);
7069   emit_operand32(rbp, adr);
7070 }
7071 
7072 void Assembler::fldcw(Address src) {
7073   InstructionMark im(this);
7074   emit_int8((unsigned char)0xD9);
7075   emit_operand32(rbp, src);
7076 }
7077 
7078 void Assembler::fldenv(Address src) {
7079   InstructionMark im(this);
7080   emit_int8((unsigned char)0xD9);
7081   emit_operand32(rsp, src);
7082 }
7083 
7084 void Assembler::fldlg2() {
7085   emit_int8((unsigned char)0xD9);
7086   emit_int8((unsigned char)0xEC);
7087 }
7088 
7089 void Assembler::fldln2() {
7090   emit_int8((unsigned char)0xD9);
7091   emit_int8((unsigned char)0xED);
7092 }
7093 
7094 void Assembler::fldz() {
7095   emit_int8((unsigned char)0xD9);
7096   emit_int8((unsigned char)0xEE);
7097 }
7098 
7099 void Assembler::flog() {
7100   fldln2();
7101   fxch();
7102   fyl2x();
7103 }
7104 
7105 void Assembler::flog10() {
7106   fldlg2();
7107   fxch();
7108   fyl2x();
7109 }
7110 
7111 void Assembler::fmul(int i) {
7112   emit_farith(0xD8, 0xC8, i);
7113 }
7114 
7115 void Assembler::fmul_d(Address src) {
7116   InstructionMark im(this);
7117   emit_int8((unsigned char)0xDC);
7118   emit_operand32(rcx, src);
7119 }
7120 
7121 void Assembler::fmul_s(Address src) {
7122   InstructionMark im(this);
7123   emit_int8((unsigned char)0xD8);
7124   emit_operand32(rcx, src);
7125 }
7126 
7127 void Assembler::fmula(int i) {
7128   emit_farith(0xDC, 0xC8, i);
7129 }
7130 
7131 void Assembler::fmulp(int i) {
7132   emit_farith(0xDE, 0xC8, i);
7133 }
7134 
7135 void Assembler::fnsave(Address dst) {
7136   InstructionMark im(this);
7137   emit_int8((unsigned char)0xDD);
7138   emit_operand32(rsi, dst);
7139 }
7140 
7141 void Assembler::fnstcw(Address src) {
7142   InstructionMark im(this);
7143   emit_int8((unsigned char)0x9B);
7144   emit_int8((unsigned char)0xD9);
7145   emit_operand32(rdi, src);
7146 }
7147 
7148 void Assembler::fnstsw_ax() {
7149   emit_int8((unsigned char)0xDF);
7150   emit_int8((unsigned char)0xE0);
7151 }
7152 
7153 void Assembler::fprem() {
7154   emit_int8((unsigned char)0xD9);
7155   emit_int8((unsigned char)0xF8);
7156 }
7157 
7158 void Assembler::fprem1() {
7159   emit_int8((unsigned char)0xD9);
7160   emit_int8((unsigned char)0xF5);
7161 }
7162 
7163 void Assembler::frstor(Address src) {
7164   InstructionMark im(this);
7165   emit_int8((unsigned char)0xDD);
7166   emit_operand32(rsp, src);
7167 }
7168 
7169 void Assembler::fsin() {
7170   emit_int8((unsigned char)0xD9);
7171   emit_int8((unsigned char)0xFE);
7172 }
7173 
7174 void Assembler::fsqrt() {
7175   emit_int8((unsigned char)0xD9);
7176   emit_int8((unsigned char)0xFA);
7177 }
7178 
7179 void Assembler::fst_d(Address adr) {
7180   InstructionMark im(this);
7181   emit_int8((unsigned char)0xDD);
7182   emit_operand32(rdx, adr);
7183 }
7184 
7185 void Assembler::fst_s(Address adr) {
7186   InstructionMark im(this);
7187   emit_int8((unsigned char)0xD9);
7188   emit_operand32(rdx, adr);
7189 }
7190 
7191 void Assembler::fstp_d(Address adr) {
7192   InstructionMark im(this);
7193   emit_int8((unsigned char)0xDD);
7194   emit_operand32(rbx, adr);
7195 }
7196 
7197 void Assembler::fstp_d(int index) {
7198   emit_farith(0xDD, 0xD8, index);
7199 }
7200 
7201 void Assembler::fstp_s(Address adr) {
7202   InstructionMark im(this);
7203   emit_int8((unsigned char)0xD9);
7204   emit_operand32(rbx, adr);
7205 }
7206 
7207 void Assembler::fstp_x(Address adr) {
7208   InstructionMark im(this);
7209   emit_int8((unsigned char)0xDB);
7210   emit_operand32(rdi, adr);
7211 }
7212 
7213 void Assembler::fsub(int i) {
7214   emit_farith(0xD8, 0xE0, i);
7215 }
7216 
7217 void Assembler::fsub_d(Address src) {
7218   InstructionMark im(this);
7219   emit_int8((unsigned char)0xDC);
7220   emit_operand32(rsp, src);
7221 }
7222 
7223 void Assembler::fsub_s(Address src) {
7224   InstructionMark im(this);
7225   emit_int8((unsigned char)0xD8);
7226   emit_operand32(rsp, src);
7227 }
7228 
7229 void Assembler::fsuba(int i) {
7230   emit_farith(0xDC, 0xE8, i);
7231 }
7232 
7233 void Assembler::fsubp(int i) {
7234   emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
7235 }
7236 
7237 void Assembler::fsubr(int i) {
7238   emit_farith(0xD8, 0xE8, i);
7239 }
7240 
7241 void Assembler::fsubr_d(Address src) {
7242   InstructionMark im(this);
7243   emit_int8((unsigned char)0xDC);
7244   emit_operand32(rbp, src);
7245 }
7246 
7247 void Assembler::fsubr_s(Address src) {
7248   InstructionMark im(this);
7249   emit_int8((unsigned char)0xD8);
7250   emit_operand32(rbp, src);
7251 }
7252 
7253 void Assembler::fsubra(int i) {
7254   emit_farith(0xDC, 0xE0, i);
7255 }
7256 
7257 void Assembler::fsubrp(int i) {
7258   emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
7259 }
7260 
7261 void Assembler::ftan() {
7262   emit_int8((unsigned char)0xD9);
7263   emit_int8((unsigned char)0xF2);
7264   emit_int8((unsigned char)0xDD);
7265   emit_int8((unsigned char)0xD8);
7266 }
7267 
7268 void Assembler::ftst() {
7269   emit_int8((unsigned char)0xD9);
7270   emit_int8((unsigned char)0xE4);
7271 }
7272 
7273 void Assembler::fucomi(int i) {
7274   // make sure the instruction is supported (introduced for P6, together with cmov)
7275   guarantee(VM_Version::supports_cmov(), "illegal instruction");
7276   emit_farith(0xDB, 0xE8, i);
7277 }
7278 
7279 void Assembler::fucomip(int i) {
7280   // make sure the instruction is supported (introduced for P6, together with cmov)
7281   guarantee(VM_Version::supports_cmov(), "illegal instruction");
7282   emit_farith(0xDF, 0xE8, i);
7283 }
7284 
7285 void Assembler::fwait() {
7286   emit_int8((unsigned char)0x9B);
7287 }
7288 
7289 void Assembler::fxch(int i) {
7290   emit_farith(0xD9, 0xC8, i);
7291 }
7292 
7293 void Assembler::fyl2x() {
7294   emit_int8((unsigned char)0xD9);
7295   emit_int8((unsigned char)0xF1);
7296 }
7297 
7298 void Assembler::frndint() {
7299   emit_int8((unsigned char)0xD9);
7300   emit_int8((unsigned char)0xFC);
7301 }
7302 
7303 void Assembler::f2xm1() {
7304   emit_int8((unsigned char)0xD9);
7305   emit_int8((unsigned char)0xF0);
7306 }
7307 
7308 void Assembler::fldl2e() {
7309   emit_int8((unsigned char)0xD9);
7310   emit_int8((unsigned char)0xEA);
7311 }
7312 
7313 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
7314 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
7315 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
7316 static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
7317 
7318 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
7319 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
7320   if (pre > 0) {
7321     emit_int8(simd_pre[pre]);
7322   }
7323   if (rex_w) {
7324     prefixq(adr, xreg);
7325   } else {
7326     prefix(adr, xreg);
7327   }
7328   if (opc > 0) {
7329     emit_int8(0x0F);
7330     int opc2 = simd_opc[opc];
7331     if (opc2 > 0) {
7332       emit_int8(opc2);
7333     }
7334   }
7335 }
7336 
7337 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
7338   if (pre > 0) {
7339     emit_int8(simd_pre[pre]);
7340   }
7341   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : prefix_and_encode(dst_enc, src_enc);
7342   if (opc > 0) {
7343     emit_int8(0x0F);
7344     int opc2 = simd_opc[opc];
7345     if (opc2 > 0) {
7346       emit_int8(opc2);
7347     }
7348   }
7349   return encode;
7350 }
7351 
7352 
7353 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc) {
7354   int vector_len = _attributes->get_vector_len();
7355   bool vex_w = _attributes->is_rex_vex_w();
7356   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
7357     prefix(VEX_3bytes);
7358 
7359     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
7360     byte1 = (~byte1) & 0xE0;
7361     byte1 |= opc;
7362     emit_int8(byte1);
7363 
7364     int byte2 = ((~nds_enc) & 0xf) << 3;
7365     byte2 |= (vex_w ? VEX_W : 0) | ((vector_len > 0) ? 4 : 0) | pre;
7366     emit_int8(byte2);
7367   } else {
7368     prefix(VEX_2bytes);
7369 
7370     int byte1 = vex_r ? VEX_R : 0;
7371     byte1 = (~byte1) & 0x80;
7372     byte1 |= ((~nds_enc) & 0xf) << 3;
7373     byte1 |= ((vector_len > 0 ) ? 4 : 0) | pre;
7374     emit_int8(byte1);
7375   }
7376 }
7377 
7378 // This is a 4 byte encoding
7379 void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v, int nds_enc, VexSimdPrefix pre, VexOpcode opc){
7380   // EVEX 0x62 prefix
7381   prefix(EVEX_4bytes);
7382   bool vex_w = _attributes->is_rex_vex_w();
7383   int evex_encoding = (vex_w ? VEX_W : 0);
7384   // EVEX.b is not currently used for broadcast of single element or data rounding modes
7385   _attributes->set_evex_encoding(evex_encoding);
7386 
7387   // P0: byte 2, initialized to RXBR`00mm
7388   // instead of not'd
7389   int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0);
7390   byte2 = (~byte2) & 0xF0;
7391   // confine opc opcode extensions in mm bits to lower two bits
7392   // of form {0F, 0F_38, 0F_3A}
7393   byte2 |= opc;
7394   emit_int8(byte2);
7395 
7396   // P1: byte 3 as Wvvvv1pp
7397   int byte3 = ((~nds_enc) & 0xf) << 3;
7398   // p[10] is always 1
7399   byte3 |= EVEX_F;
7400   byte3 |= (vex_w & 1) << 7;
7401   // confine pre opcode extensions in pp bits to lower two bits
7402   // of form {66, F3, F2}
7403   byte3 |= pre;
7404   emit_int8(byte3);
7405 
7406   // P2: byte 4 as zL'Lbv'aaa
7407   // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
7408   int byte4 = (_attributes->is_no_reg_mask()) ?
7409               0 :
7410               _attributes->get_embedded_opmask_register_specifier();
7411   // EVEX.v` for extending EVEX.vvvv or VIDX
7412   byte4 |= (evex_v ? 0: EVEX_V);
7413   // third EXEC.b for broadcast actions
7414   byte4 |= (_attributes->is_extended_context() ? EVEX_Rb : 0);
7415   // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
7416   byte4 |= ((_attributes->get_vector_len())& 0x3) << 5;
7417   // last is EVEX.z for zero/merge actions
7418   if (_attributes->is_no_reg_mask() == false) {
7419     byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0);
7420   }
7421   emit_int8(byte4);
7422 }
7423 
7424 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
7425   bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0;
7426   bool vex_b = adr.base_needs_rex();
7427   bool vex_x = adr.index_needs_rex();
7428   set_attributes(attributes);
7429   attributes->set_current_assembler(this);
7430 
7431   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
7432   if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
7433     switch (attributes->get_vector_len()) {
7434     case AVX_128bit:
7435     case AVX_256bit:
7436       attributes->set_is_legacy_mode();
7437       break;
7438     }
7439   }
7440 
7441   // For pure EVEX check and see if this instruction
7442   // is allowed in legacy mode and has resources which will
7443   // fit in it.  Pure EVEX instructions will use set_is_evex_instruction in their definition,
7444   // else that field is set when we encode to EVEX
7445   if (UseAVX > 2 && !attributes->is_legacy_mode() &&
7446       !_is_managed && !attributes->is_evex_instruction()) {
7447     if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
7448       bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
7449       if (check_register_bank) {
7450         // check nds_enc and xreg_enc for upper bank usage
7451         if (nds_enc < 16 && xreg_enc < 16) {
7452           attributes->set_is_legacy_mode();
7453         }
7454       } else {
7455         attributes->set_is_legacy_mode();
7456       }
7457     }
7458   }
7459 
7460   _is_managed = false;
7461   if (UseAVX > 2 && !attributes->is_legacy_mode())
7462   {
7463     bool evex_r = (xreg_enc >= 16);
7464     bool evex_v = (nds_enc >= 16);
7465     attributes->set_is_evex_instruction();
7466     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
7467   } else {
7468     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
7469       attributes->set_rex_vex_w(false);
7470     }
7471     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
7472   }
7473 }
7474 
7475 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
7476   bool vex_r = ((dst_enc & 8) == 8) ? 1 : 0;
7477   bool vex_b = ((src_enc & 8) == 8) ? 1 : 0;
7478   bool vex_x = false;
7479   set_attributes(attributes);
7480   attributes->set_current_assembler(this);
7481   bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
7482 
7483   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
7484   if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
7485     switch (attributes->get_vector_len()) {
7486     case AVX_128bit:
7487     case AVX_256bit:
7488       if (check_register_bank) {
7489         if (dst_enc >= 16 || nds_enc >= 16 || src_enc >= 16) {
7490           // up propagate arithmetic instructions to meet RA requirements
7491           attributes->set_vector_len(AVX_512bit);
7492         } else {
7493           attributes->set_is_legacy_mode();
7494         }
7495       } else {
7496         attributes->set_is_legacy_mode();
7497       }
7498       break;
7499     }
7500   }
7501 
7502   // For pure EVEX check and see if this instruction
7503   // is allowed in legacy mode and has resources which will
7504   // fit in it.  Pure EVEX instructions will use set_is_evex_instruction in their definition,
7505   // else that field is set when we encode to EVEX
7506   if (UseAVX > 2 && !attributes->is_legacy_mode() &&
7507       !_is_managed && !attributes->is_evex_instruction()) {
7508     if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
7509       if (check_register_bank) {
7510         // check dst_enc, nds_enc and src_enc for upper bank usage
7511         if (dst_enc < 16 && nds_enc < 16 && src_enc < 16) {
7512           attributes->set_is_legacy_mode();
7513         }
7514       } else {
7515         attributes->set_is_legacy_mode();
7516       }
7517     }
7518   }
7519 
7520   _is_managed = false;
7521   if (UseAVX > 2 && !attributes->is_legacy_mode())
7522   {
7523     bool evex_r = (dst_enc >= 16);
7524     bool evex_v = (nds_enc >= 16);
7525     // can use vex_x as bank extender on rm encoding
7526     vex_x = (src_enc >= 16);
7527     attributes->set_is_evex_instruction();
7528     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
7529   } else {
7530     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
7531       attributes->set_rex_vex_w(false);
7532     }
7533     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
7534   }
7535 
7536   // return modrm byte components for operands
7537   return (((dst_enc & 7) << 3) | (src_enc & 7));
7538 }
7539 
7540 
7541 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
7542                             VexOpcode opc, InstructionAttr *attributes) {
7543   if (UseAVX > 0) {
7544     int xreg_enc = xreg->encoding();
7545     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
7546     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, attributes);
7547   } else {
7548     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
7549     rex_prefix(adr, xreg, pre, opc, attributes->is_rex_vex_w());
7550   }
7551 }
7552 
7553 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
7554                                       VexOpcode opc, InstructionAttr *attributes) {
7555   int dst_enc = dst->encoding();
7556   int src_enc = src->encoding();
7557   if (UseAVX > 0) {
7558     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
7559     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes);
7560   } else {
7561     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
7562     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, attributes->is_rex_vex_w());
7563   }
7564 }
7565 
7566 void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
7567   assert(VM_Version::supports_avx(), "");
7568   assert(!VM_Version::supports_evex(), "");
7569   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7570   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7571   emit_int8((unsigned char)0xC2);
7572   emit_int8((unsigned char)(0xC0 | encode));
7573   emit_int8((unsigned char)(0xF & cop));
7574 }
7575 
7576 void Assembler::blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
7577   assert(VM_Version::supports_avx(), "");
7578   assert(!VM_Version::supports_evex(), "");
7579   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7580   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7581   emit_int8((unsigned char)0x4B);
7582   emit_int8((unsigned char)(0xC0 | encode));
7583   int src2_enc = src2->encoding();
7584   emit_int8((unsigned char)(0xF0 & src2_enc<<4));
7585 }
7586 
7587 void Assembler::cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
7588   assert(VM_Version::supports_avx(), "");
7589   assert(!VM_Version::supports_evex(), "");
7590   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7591   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
7592   emit_int8((unsigned char)0xC2);
7593   emit_int8((unsigned char)(0xC0 | encode));
7594   emit_int8((unsigned char)(0xF & cop));
7595 }
7596 
7597 void Assembler::blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
7598   assert(VM_Version::supports_avx(), "");
7599   assert(!VM_Version::supports_evex(), "");
7600   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7601   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7602   emit_int8((unsigned char)0x4A);
7603   emit_int8((unsigned char)(0xC0 | encode));
7604   int src2_enc = src2->encoding();
7605   emit_int8((unsigned char)(0xF0 & src2_enc<<4));
7606 }
7607 
7608 void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
7609   assert(VM_Version::supports_avx2(), "");
7610   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7611   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7612   emit_int8((unsigned char)0x02);
7613   emit_int8((unsigned char)(0xC0 | encode));
7614   emit_int8((unsigned char)imm8);
7615 }
7616 
7617 void Assembler::shlxl(Register dst, Register src1, Register src2) {
7618   assert(VM_Version::supports_bmi2(), "");
7619   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
7620   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7621   emit_int8((unsigned char)0xF7);
7622   emit_int8((unsigned char)(0xC0 | encode));
7623 }
7624 
7625 void Assembler::shlxq(Register dst, Register src1, Register src2) {
7626   assert(VM_Version::supports_bmi2(), "");
7627   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
7628   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7629   emit_int8((unsigned char)0xF7);
7630   emit_int8((unsigned char)(0xC0 | encode));
7631 }
7632 
7633 #ifndef _LP64
7634 
7635 void Assembler::incl(Register dst) {
7636   // Don't use it directly. Use MacroAssembler::incrementl() instead.
7637   emit_int8(0x40 | dst->encoding());
7638 }
7639 
7640 void Assembler::lea(Register dst, Address src) {
7641   leal(dst, src);
7642 }
7643 
7644 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) {
7645   InstructionMark im(this);
7646   emit_int8((unsigned char)0xC7);
7647   emit_operand(rax, dst);
7648   emit_data((int)imm32, rspec, 0);
7649 }
7650 
7651 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
7652   InstructionMark im(this);
7653   int encode = prefix_and_encode(dst->encoding());
7654   emit_int8((unsigned char)(0xB8 | encode));
7655   emit_data((int)imm32, rspec, 0);
7656 }
7657 
7658 void Assembler::popa() { // 32bit
7659   emit_int8(0x61);
7660 }
7661 
7662 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
7663   InstructionMark im(this);
7664   emit_int8(0x68);
7665   emit_data(imm32, rspec, 0);
7666 }
7667 
7668 void Assembler::pusha() { // 32bit
7669   emit_int8(0x60);
7670 }
7671 
7672 void Assembler::set_byte_if_not_zero(Register dst) {
7673   emit_int8(0x0F);
7674   emit_int8((unsigned char)0x95);
7675   emit_int8((unsigned char)(0xE0 | dst->encoding()));
7676 }
7677 
7678 void Assembler::shldl(Register dst, Register src) {
7679   emit_int8(0x0F);
7680   emit_int8((unsigned char)0xA5);
7681   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7682 }
7683 
7684 // 0F A4 / r ib
7685 void Assembler::shldl(Register dst, Register src, int8_t imm8) {
7686   emit_int8(0x0F);
7687   emit_int8((unsigned char)0xA4);
7688   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7689   emit_int8(imm8);
7690 }
7691 
7692 void Assembler::shrdl(Register dst, Register src) {
7693   emit_int8(0x0F);
7694   emit_int8((unsigned char)0xAD);
7695   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7696 }
7697 
7698 #else // LP64
7699 
7700 void Assembler::set_byte_if_not_zero(Register dst) {
7701   int enc = prefix_and_encode(dst->encoding(), true);
7702   emit_int8(0x0F);
7703   emit_int8((unsigned char)0x95);
7704   emit_int8((unsigned char)(0xE0 | enc));
7705 }
7706 
7707 // 64bit only pieces of the assembler
7708 // This should only be used by 64bit instructions that can use rip-relative
7709 // it cannot be used by instructions that want an immediate value.
7710 
7711 bool Assembler::reachable(AddressLiteral adr) {
7712   int64_t disp;
7713   // None will force a 64bit literal to the code stream. Likely a placeholder
7714   // for something that will be patched later and we need to certain it will
7715   // always be reachable.
7716   if (adr.reloc() == relocInfo::none) {
7717     return false;
7718   }
7719   if (adr.reloc() == relocInfo::internal_word_type) {
7720     // This should be rip relative and easily reachable.
7721     return true;
7722   }
7723   if (adr.reloc() == relocInfo::virtual_call_type ||
7724       adr.reloc() == relocInfo::opt_virtual_call_type ||
7725       adr.reloc() == relocInfo::static_call_type ||
7726       adr.reloc() == relocInfo::static_stub_type ) {
7727     // This should be rip relative within the code cache and easily
7728     // reachable until we get huge code caches. (At which point
7729     // ic code is going to have issues).
7730     return true;
7731   }
7732   if (adr.reloc() != relocInfo::external_word_type &&
7733       adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
7734       adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
7735       adr.reloc() != relocInfo::runtime_call_type ) {
7736     return false;
7737   }
7738 
7739   // Stress the correction code
7740   if (ForceUnreachable) {
7741     // Must be runtimecall reloc, see if it is in the codecache
7742     // Flipping stuff in the codecache to be unreachable causes issues
7743     // with things like inline caches where the additional instructions
7744     // are not handled.
7745     if (CodeCache::find_blob(adr._target) == NULL) {
7746       return false;
7747     }
7748   }
7749   // For external_word_type/runtime_call_type if it is reachable from where we
7750   // are now (possibly a temp buffer) and where we might end up
7751   // anywhere in the codeCache then we are always reachable.
7752   // This would have to change if we ever save/restore shared code
7753   // to be more pessimistic.
7754   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
7755   if (!is_simm32(disp)) return false;
7756   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
7757   if (!is_simm32(disp)) return false;
7758 
7759   disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
7760 
7761   // Because rip relative is a disp + address_of_next_instruction and we
7762   // don't know the value of address_of_next_instruction we apply a fudge factor
7763   // to make sure we will be ok no matter the size of the instruction we get placed into.
7764   // We don't have to fudge the checks above here because they are already worst case.
7765 
7766   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
7767   // + 4 because better safe than sorry.
7768   const int fudge = 12 + 4;
7769   if (disp < 0) {
7770     disp -= fudge;
7771   } else {
7772     disp += fudge;
7773   }
7774   return is_simm32(disp);
7775 }
7776 
7777 // Check if the polling page is not reachable from the code cache using rip-relative
7778 // addressing.
7779 bool Assembler::is_polling_page_far() {
7780   intptr_t addr = (intptr_t)os::get_polling_page();
7781   return ForceUnreachable ||
7782          !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
7783          !is_simm32(addr - (intptr_t)CodeCache::high_bound());
7784 }
7785 
7786 void Assembler::emit_data64(jlong data,
7787                             relocInfo::relocType rtype,
7788                             int format) {
7789   if (rtype == relocInfo::none) {
7790     emit_int64(data);
7791   } else {
7792     emit_data64(data, Relocation::spec_simple(rtype), format);
7793   }
7794 }
7795 
7796 void Assembler::emit_data64(jlong data,
7797                             RelocationHolder const& rspec,
7798                             int format) {
7799   assert(imm_operand == 0, "default format must be immediate in this file");
7800   assert(imm_operand == format, "must be immediate");
7801   assert(inst_mark() != NULL, "must be inside InstructionMark");
7802   // Do not use AbstractAssembler::relocate, which is not intended for
7803   // embedded words.  Instead, relocate to the enclosing instruction.
7804   code_section()->relocate(inst_mark(), rspec, format);
7805 #ifdef ASSERT
7806   check_relocation(rspec, format);
7807 #endif
7808   emit_int64(data);
7809 }
7810 
7811 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
7812   if (reg_enc >= 8) {
7813     prefix(REX_B);
7814     reg_enc -= 8;
7815   } else if (byteinst && reg_enc >= 4) {
7816     prefix(REX);
7817   }
7818   return reg_enc;
7819 }
7820 
7821 int Assembler::prefixq_and_encode(int reg_enc) {
7822   if (reg_enc < 8) {
7823     prefix(REX_W);
7824   } else {
7825     prefix(REX_WB);
7826     reg_enc -= 8;
7827   }
7828   return reg_enc;
7829 }
7830 
7831 int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte) {
7832   if (dst_enc < 8) {
7833     if (src_enc >= 8) {
7834       prefix(REX_B);
7835       src_enc -= 8;
7836     } else if ((src_is_byte && src_enc >= 4) || (dst_is_byte && dst_enc >= 4)) {
7837       prefix(REX);
7838     }
7839   } else {
7840     if (src_enc < 8) {
7841       prefix(REX_R);
7842     } else {
7843       prefix(REX_RB);
7844       src_enc -= 8;
7845     }
7846     dst_enc -= 8;
7847   }
7848   return dst_enc << 3 | src_enc;
7849 }
7850 
7851 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
7852   if (dst_enc < 8) {
7853     if (src_enc < 8) {
7854       prefix(REX_W);
7855     } else {
7856       prefix(REX_WB);
7857       src_enc -= 8;
7858     }
7859   } else {
7860     if (src_enc < 8) {
7861       prefix(REX_WR);
7862     } else {
7863       prefix(REX_WRB);
7864       src_enc -= 8;
7865     }
7866     dst_enc -= 8;
7867   }
7868   return dst_enc << 3 | src_enc;
7869 }
7870 
7871 void Assembler::prefix(Register reg) {
7872   if (reg->encoding() >= 8) {
7873     prefix(REX_B);
7874   }
7875 }
7876 
7877 void Assembler::prefix(Register dst, Register src, Prefix p) {
7878   if (src->encoding() >= 8) {
7879     p = (Prefix)(p | REX_B);
7880   }
7881   if (dst->encoding() >= 8) {
7882     p = (Prefix)( p | REX_R);
7883   }
7884   if (p != Prefix_EMPTY) {
7885     // do not generate an empty prefix
7886     prefix(p);
7887   }
7888 }
7889 
7890 void Assembler::prefix(Register dst, Address adr, Prefix p) {
7891   if (adr.base_needs_rex()) {
7892     if (adr.index_needs_rex()) {
7893       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
7894     } else {
7895       prefix(REX_B);
7896     }
7897   } else {
7898     if (adr.index_needs_rex()) {
7899       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
7900     }
7901   }
7902   if (dst->encoding() >= 8) {
7903     p = (Prefix)(p | REX_R);
7904   }
7905   if (p != Prefix_EMPTY) {
7906     // do not generate an empty prefix
7907     prefix(p);
7908   }
7909 }
7910 
7911 void Assembler::prefix(Address adr) {
7912   if (adr.base_needs_rex()) {
7913     if (adr.index_needs_rex()) {
7914       prefix(REX_XB);
7915     } else {
7916       prefix(REX_B);
7917     }
7918   } else {
7919     if (adr.index_needs_rex()) {
7920       prefix(REX_X);
7921     }
7922   }
7923 }
7924 
7925 void Assembler::prefixq(Address adr) {
7926   if (adr.base_needs_rex()) {
7927     if (adr.index_needs_rex()) {
7928       prefix(REX_WXB);
7929     } else {
7930       prefix(REX_WB);
7931     }
7932   } else {
7933     if (adr.index_needs_rex()) {
7934       prefix(REX_WX);
7935     } else {
7936       prefix(REX_W);
7937     }
7938   }
7939 }
7940 
7941 
7942 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
7943   if (reg->encoding() < 8) {
7944     if (adr.base_needs_rex()) {
7945       if (adr.index_needs_rex()) {
7946         prefix(REX_XB);
7947       } else {
7948         prefix(REX_B);
7949       }
7950     } else {
7951       if (adr.index_needs_rex()) {
7952         prefix(REX_X);
7953       } else if (byteinst && reg->encoding() >= 4 ) {
7954         prefix(REX);
7955       }
7956     }
7957   } else {
7958     if (adr.base_needs_rex()) {
7959       if (adr.index_needs_rex()) {
7960         prefix(REX_RXB);
7961       } else {
7962         prefix(REX_RB);
7963       }
7964     } else {
7965       if (adr.index_needs_rex()) {
7966         prefix(REX_RX);
7967       } else {
7968         prefix(REX_R);
7969       }
7970     }
7971   }
7972 }
7973 
7974 void Assembler::prefixq(Address adr, Register src) {
7975   if (src->encoding() < 8) {
7976     if (adr.base_needs_rex()) {
7977       if (adr.index_needs_rex()) {
7978         prefix(REX_WXB);
7979       } else {
7980         prefix(REX_WB);
7981       }
7982     } else {
7983       if (adr.index_needs_rex()) {
7984         prefix(REX_WX);
7985       } else {
7986         prefix(REX_W);
7987       }
7988     }
7989   } else {
7990     if (adr.base_needs_rex()) {
7991       if (adr.index_needs_rex()) {
7992         prefix(REX_WRXB);
7993       } else {
7994         prefix(REX_WRB);
7995       }
7996     } else {
7997       if (adr.index_needs_rex()) {
7998         prefix(REX_WRX);
7999       } else {
8000         prefix(REX_WR);
8001       }
8002     }
8003   }
8004 }
8005 
8006 void Assembler::prefix(Address adr, XMMRegister reg) {
8007   if (reg->encoding() < 8) {
8008     if (adr.base_needs_rex()) {
8009       if (adr.index_needs_rex()) {
8010         prefix(REX_XB);
8011       } else {
8012         prefix(REX_B);
8013       }
8014     } else {
8015       if (adr.index_needs_rex()) {
8016         prefix(REX_X);
8017       }
8018     }
8019   } else {
8020     if (adr.base_needs_rex()) {
8021       if (adr.index_needs_rex()) {
8022         prefix(REX_RXB);
8023       } else {
8024         prefix(REX_RB);
8025       }
8026     } else {
8027       if (adr.index_needs_rex()) {
8028         prefix(REX_RX);
8029       } else {
8030         prefix(REX_R);
8031       }
8032     }
8033   }
8034 }
8035 
8036 void Assembler::prefixq(Address adr, XMMRegister src) {
8037   if (src->encoding() < 8) {
8038     if (adr.base_needs_rex()) {
8039       if (adr.index_needs_rex()) {
8040         prefix(REX_WXB);
8041       } else {
8042         prefix(REX_WB);
8043       }
8044     } else {
8045       if (adr.index_needs_rex()) {
8046         prefix(REX_WX);
8047       } else {
8048         prefix(REX_W);
8049       }
8050     }
8051   } else {
8052     if (adr.base_needs_rex()) {
8053       if (adr.index_needs_rex()) {
8054         prefix(REX_WRXB);
8055       } else {
8056         prefix(REX_WRB);
8057       }
8058     } else {
8059       if (adr.index_needs_rex()) {
8060         prefix(REX_WRX);
8061       } else {
8062         prefix(REX_WR);
8063       }
8064     }
8065   }
8066 }
8067 
8068 void Assembler::adcq(Register dst, int32_t imm32) {
8069   (void) prefixq_and_encode(dst->encoding());
8070   emit_arith(0x81, 0xD0, dst, imm32);
8071 }
8072 
8073 void Assembler::adcq(Register dst, Address src) {
8074   InstructionMark im(this);
8075   prefixq(src, dst);
8076   emit_int8(0x13);
8077   emit_operand(dst, src);
8078 }
8079 
8080 void Assembler::adcq(Register dst, Register src) {
8081   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8082   emit_arith(0x13, 0xC0, dst, src);
8083 }
8084 
8085 void Assembler::addq(Address dst, int32_t imm32) {
8086   InstructionMark im(this);
8087   prefixq(dst);
8088   emit_arith_operand(0x81, rax, dst,imm32);
8089 }
8090 
8091 void Assembler::addq(Address dst, Register src) {
8092   InstructionMark im(this);
8093   prefixq(dst, src);
8094   emit_int8(0x01);
8095   emit_operand(src, dst);
8096 }
8097 
8098 void Assembler::addq(Register dst, int32_t imm32) {
8099   (void) prefixq_and_encode(dst->encoding());
8100   emit_arith(0x81, 0xC0, dst, imm32);
8101 }
8102 
8103 void Assembler::addq(Register dst, Address src) {
8104   InstructionMark im(this);
8105   prefixq(src, dst);
8106   emit_int8(0x03);
8107   emit_operand(dst, src);
8108 }
8109 
8110 void Assembler::addq(Register dst, Register src) {
8111   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8112   emit_arith(0x03, 0xC0, dst, src);
8113 }
8114 
8115 void Assembler::adcxq(Register dst, Register src) {
8116   //assert(VM_Version::supports_adx(), "adx instructions not supported");
8117   emit_int8((unsigned char)0x66);
8118   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8119   emit_int8(0x0F);
8120   emit_int8(0x38);
8121   emit_int8((unsigned char)0xF6);
8122   emit_int8((unsigned char)(0xC0 | encode));
8123 }
8124 
8125 void Assembler::adoxq(Register dst, Register src) {
8126   //assert(VM_Version::supports_adx(), "adx instructions not supported");
8127   emit_int8((unsigned char)0xF3);
8128   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8129   emit_int8(0x0F);
8130   emit_int8(0x38);
8131   emit_int8((unsigned char)0xF6);
8132   emit_int8((unsigned char)(0xC0 | encode));
8133 }
8134 
8135 void Assembler::andq(Address dst, int32_t imm32) {
8136   InstructionMark im(this);
8137   prefixq(dst);
8138   emit_int8((unsigned char)0x81);
8139   emit_operand(rsp, dst, 4);
8140   emit_int32(imm32);
8141 }
8142 
8143 void Assembler::andq(Register dst, int32_t imm32) {
8144   (void) prefixq_and_encode(dst->encoding());
8145   emit_arith(0x81, 0xE0, dst, imm32);
8146 }
8147 
8148 void Assembler::andq(Register dst, Address src) {
8149   InstructionMark im(this);
8150   prefixq(src, dst);
8151   emit_int8(0x23);
8152   emit_operand(dst, src);
8153 }
8154 
8155 void Assembler::andq(Register dst, Register src) {
8156   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8157   emit_arith(0x23, 0xC0, dst, src);
8158 }
8159 
8160 void Assembler::andnq(Register dst, Register src1, Register src2) {
8161   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8162   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8163   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8164   emit_int8((unsigned char)0xF2);
8165   emit_int8((unsigned char)(0xC0 | encode));
8166 }
8167 
8168 void Assembler::andnq(Register dst, Register src1, Address src2) {
8169   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8170   InstructionMark im(this);
8171   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8172   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8173   emit_int8((unsigned char)0xF2);
8174   emit_operand(dst, src2);
8175 }
8176 
8177 void Assembler::bsfq(Register dst, Register src) {
8178   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8179   emit_int8(0x0F);
8180   emit_int8((unsigned char)0xBC);
8181   emit_int8((unsigned char)(0xC0 | encode));
8182 }
8183 
8184 void Assembler::bsrq(Register dst, Register src) {
8185   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8186   emit_int8(0x0F);
8187   emit_int8((unsigned char)0xBD);
8188   emit_int8((unsigned char)(0xC0 | encode));
8189 }
8190 
8191 void Assembler::bswapq(Register reg) {
8192   int encode = prefixq_and_encode(reg->encoding());
8193   emit_int8(0x0F);
8194   emit_int8((unsigned char)(0xC8 | encode));
8195 }
8196 
8197 void Assembler::blsiq(Register dst, Register src) {
8198   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8199   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8200   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8201   emit_int8((unsigned char)0xF3);
8202   emit_int8((unsigned char)(0xC0 | encode));
8203 }
8204 
8205 void Assembler::blsiq(Register dst, Address src) {
8206   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8207   InstructionMark im(this);
8208   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8209   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8210   emit_int8((unsigned char)0xF3);
8211   emit_operand(rbx, src);
8212 }
8213 
8214 void Assembler::blsmskq(Register dst, Register src) {
8215   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8216   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8217   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8218   emit_int8((unsigned char)0xF3);
8219   emit_int8((unsigned char)(0xC0 | encode));
8220 }
8221 
8222 void Assembler::blsmskq(Register dst, Address src) {
8223   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8224   InstructionMark im(this);
8225   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8226   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8227   emit_int8((unsigned char)0xF3);
8228   emit_operand(rdx, src);
8229 }
8230 
8231 void Assembler::blsrq(Register dst, Register src) {
8232   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8233   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8234   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8235   emit_int8((unsigned char)0xF3);
8236   emit_int8((unsigned char)(0xC0 | encode));
8237 }
8238 
8239 void Assembler::blsrq(Register dst, Address src) {
8240   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8241   InstructionMark im(this);
8242   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8243   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8244   emit_int8((unsigned char)0xF3);
8245   emit_operand(rcx, src);
8246 }
8247 
8248 void Assembler::cdqq() {
8249   prefix(REX_W);
8250   emit_int8((unsigned char)0x99);
8251 }
8252 
8253 void Assembler::clflush(Address adr) {
8254   prefix(adr);
8255   emit_int8(0x0F);
8256   emit_int8((unsigned char)0xAE);
8257   emit_operand(rdi, adr);
8258 }
8259 
8260 void Assembler::cmovq(Condition cc, Register dst, Register src) {
8261   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8262   emit_int8(0x0F);
8263   emit_int8(0x40 | cc);
8264   emit_int8((unsigned char)(0xC0 | encode));
8265 }
8266 
8267 void Assembler::cmovq(Condition cc, Register dst, Address src) {
8268   InstructionMark im(this);
8269   prefixq(src, dst);
8270   emit_int8(0x0F);
8271   emit_int8(0x40 | cc);
8272   emit_operand(dst, src);
8273 }
8274 
8275 void Assembler::cmpq(Address dst, int32_t imm32) {
8276   InstructionMark im(this);
8277   prefixq(dst);
8278   emit_int8((unsigned char)0x81);
8279   emit_operand(rdi, dst, 4);
8280   emit_int32(imm32);
8281 }
8282 
8283 void Assembler::cmpq(Register dst, int32_t imm32) {
8284   (void) prefixq_and_encode(dst->encoding());
8285   emit_arith(0x81, 0xF8, dst, imm32);
8286 }
8287 
8288 void Assembler::cmpq(Address dst, Register src) {
8289   InstructionMark im(this);
8290   prefixq(dst, src);
8291   emit_int8(0x3B);
8292   emit_operand(src, dst);
8293 }
8294 
8295 void Assembler::cmpq(Register dst, Register src) {
8296   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8297   emit_arith(0x3B, 0xC0, dst, src);
8298 }
8299 
8300 void Assembler::cmpq(Register dst, Address  src) {
8301   InstructionMark im(this);
8302   prefixq(src, dst);
8303   emit_int8(0x3B);
8304   emit_operand(dst, src);
8305 }
8306 
8307 void Assembler::cmpxchgq(Register reg, Address adr) {
8308   InstructionMark im(this);
8309   prefixq(adr, reg);
8310   emit_int8(0x0F);
8311   emit_int8((unsigned char)0xB1);
8312   emit_operand(reg, adr);
8313 }
8314 
8315 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
8316   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8317   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8318   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8319   emit_int8(0x2A);
8320   emit_int8((unsigned char)(0xC0 | encode));
8321 }
8322 
8323 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
8324   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8325   InstructionMark im(this);
8326   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8327   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
8328   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8329   emit_int8(0x2A);
8330   emit_operand(dst, src);
8331 }
8332 
8333 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
8334   NOT_LP64(assert(VM_Version::supports_sse(), ""));
8335   InstructionMark im(this);
8336   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8337   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
8338   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
8339   emit_int8(0x2A);
8340   emit_operand(dst, src);
8341 }
8342 
8343 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
8344   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8345   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8346   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8347   emit_int8(0x2C);
8348   emit_int8((unsigned char)(0xC0 | encode));
8349 }
8350 
8351 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
8352   NOT_LP64(assert(VM_Version::supports_sse(), ""));
8353   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8354   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
8355   emit_int8(0x2C);
8356   emit_int8((unsigned char)(0xC0 | encode));
8357 }
8358 
8359 void Assembler::decl(Register dst) {
8360   // Don't use it directly. Use MacroAssembler::decrementl() instead.
8361   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
8362   int encode = prefix_and_encode(dst->encoding());
8363   emit_int8((unsigned char)0xFF);
8364   emit_int8((unsigned char)(0xC8 | encode));
8365 }
8366 
8367 void Assembler::decq(Register dst) {
8368   // Don't use it directly. Use MacroAssembler::decrementq() instead.
8369   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8370   int encode = prefixq_and_encode(dst->encoding());
8371   emit_int8((unsigned char)0xFF);
8372   emit_int8(0xC8 | encode);
8373 }
8374 
8375 void Assembler::decq(Address dst) {
8376   // Don't use it directly. Use MacroAssembler::decrementq() instead.
8377   InstructionMark im(this);
8378   prefixq(dst);
8379   emit_int8((unsigned char)0xFF);
8380   emit_operand(rcx, dst);
8381 }
8382 
8383 void Assembler::fxrstor(Address src) {
8384   prefixq(src);
8385   emit_int8(0x0F);
8386   emit_int8((unsigned char)0xAE);
8387   emit_operand(as_Register(1), src);
8388 }
8389 
8390 void Assembler::xrstor(Address src) {
8391   prefixq(src);
8392   emit_int8(0x0F);
8393   emit_int8((unsigned char)0xAE);
8394   emit_operand(as_Register(5), src);
8395 }
8396 
8397 void Assembler::fxsave(Address dst) {
8398   prefixq(dst);
8399   emit_int8(0x0F);
8400   emit_int8((unsigned char)0xAE);
8401   emit_operand(as_Register(0), dst);
8402 }
8403 
8404 void Assembler::xsave(Address dst) {
8405   prefixq(dst);
8406   emit_int8(0x0F);
8407   emit_int8((unsigned char)0xAE);
8408   emit_operand(as_Register(4), dst);
8409 }
8410 
8411 void Assembler::idivq(Register src) {
8412   int encode = prefixq_and_encode(src->encoding());
8413   emit_int8((unsigned char)0xF7);
8414   emit_int8((unsigned char)(0xF8 | encode));
8415 }
8416 
8417 void Assembler::imulq(Register dst, Register src) {
8418   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8419   emit_int8(0x0F);
8420   emit_int8((unsigned char)0xAF);
8421   emit_int8((unsigned char)(0xC0 | encode));
8422 }
8423 
8424 void Assembler::imulq(Register dst, Register src, int value) {
8425   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8426   if (is8bit(value)) {
8427     emit_int8(0x6B);
8428     emit_int8((unsigned char)(0xC0 | encode));
8429     emit_int8(value & 0xFF);
8430   } else {
8431     emit_int8(0x69);
8432     emit_int8((unsigned char)(0xC0 | encode));
8433     emit_int32(value);
8434   }
8435 }
8436 
8437 void Assembler::imulq(Register dst, Address src) {
8438   InstructionMark im(this);
8439   prefixq(src, dst);
8440   emit_int8(0x0F);
8441   emit_int8((unsigned char) 0xAF);
8442   emit_operand(dst, src);
8443 }
8444 
8445 void Assembler::incl(Register dst) {
8446   // Don't use it directly. Use MacroAssembler::incrementl() instead.
8447   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8448   int encode = prefix_and_encode(dst->encoding());
8449   emit_int8((unsigned char)0xFF);
8450   emit_int8((unsigned char)(0xC0 | encode));
8451 }
8452 
8453 void Assembler::incq(Register dst) {
8454   // Don't use it directly. Use MacroAssembler::incrementq() instead.
8455   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8456   int encode = prefixq_and_encode(dst->encoding());
8457   emit_int8((unsigned char)0xFF);
8458   emit_int8((unsigned char)(0xC0 | encode));
8459 }
8460 
8461 void Assembler::incq(Address dst) {
8462   // Don't use it directly. Use MacroAssembler::incrementq() instead.
8463   InstructionMark im(this);
8464   prefixq(dst);
8465   emit_int8((unsigned char)0xFF);
8466   emit_operand(rax, dst);
8467 }
8468 
8469 void Assembler::lea(Register dst, Address src) {
8470   leaq(dst, src);
8471 }
8472 
8473 void Assembler::leaq(Register dst, Address src) {
8474   InstructionMark im(this);
8475   prefixq(src, dst);
8476   emit_int8((unsigned char)0x8D);
8477   emit_operand(dst, src);
8478 }
8479 
8480 void Assembler::mov64(Register dst, int64_t imm64) {
8481   InstructionMark im(this);
8482   int encode = prefixq_and_encode(dst->encoding());
8483   emit_int8((unsigned char)(0xB8 | encode));
8484   emit_int64(imm64);
8485 }
8486 
8487 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
8488   InstructionMark im(this);
8489   int encode = prefixq_and_encode(dst->encoding());
8490   emit_int8(0xB8 | encode);
8491   emit_data64(imm64, rspec);
8492 }
8493 
8494 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
8495   InstructionMark im(this);
8496   int encode = prefix_and_encode(dst->encoding());
8497   emit_int8((unsigned char)(0xB8 | encode));
8498   emit_data((int)imm32, rspec, narrow_oop_operand);
8499 }
8500 
8501 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
8502   InstructionMark im(this);
8503   prefix(dst);
8504   emit_int8((unsigned char)0xC7);
8505   emit_operand(rax, dst, 4);
8506   emit_data((int)imm32, rspec, narrow_oop_operand);
8507 }
8508 
8509 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
8510   InstructionMark im(this);
8511   int encode = prefix_and_encode(src1->encoding());
8512   emit_int8((unsigned char)0x81);
8513   emit_int8((unsigned char)(0xF8 | encode));
8514   emit_data((int)imm32, rspec, narrow_oop_operand);
8515 }
8516 
8517 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
8518   InstructionMark im(this);
8519   prefix(src1);
8520   emit_int8((unsigned char)0x81);
8521   emit_operand(rax, src1, 4);
8522   emit_data((int)imm32, rspec, narrow_oop_operand);
8523 }
8524 
8525 void Assembler::lzcntq(Register dst, Register src) {
8526   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
8527   emit_int8((unsigned char)0xF3);
8528   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8529   emit_int8(0x0F);
8530   emit_int8((unsigned char)0xBD);
8531   emit_int8((unsigned char)(0xC0 | encode));
8532 }
8533 
8534 void Assembler::movdq(XMMRegister dst, Register src) {
8535   // table D-1 says MMX/SSE2
8536   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8537   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8538   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8539   emit_int8(0x6E);
8540   emit_int8((unsigned char)(0xC0 | encode));
8541 }
8542 
8543 void Assembler::movdq(Register dst, XMMRegister src) {
8544   // table D-1 says MMX/SSE2
8545   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8546   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8547   // swap src/dst to get correct prefix
8548   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8549   emit_int8(0x7E);
8550   emit_int8((unsigned char)(0xC0 | encode));
8551 }
8552 
8553 void Assembler::movq(Register dst, Register src) {
8554   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8555   emit_int8((unsigned char)0x8B);
8556   emit_int8((unsigned char)(0xC0 | encode));
8557 }
8558 
8559 void Assembler::movq(Register dst, Address src) {
8560   InstructionMark im(this);
8561   prefixq(src, dst);
8562   emit_int8((unsigned char)0x8B);
8563   emit_operand(dst, src);
8564 }
8565 
8566 void Assembler::movq(Address dst, Register src) {
8567   InstructionMark im(this);
8568   prefixq(dst, src);
8569   emit_int8((unsigned char)0x89);
8570   emit_operand(src, dst);
8571 }
8572 
8573 void Assembler::movsbq(Register dst, Address src) {
8574   InstructionMark im(this);
8575   prefixq(src, dst);
8576   emit_int8(0x0F);
8577   emit_int8((unsigned char)0xBE);
8578   emit_operand(dst, src);
8579 }
8580 
8581 void Assembler::movsbq(Register dst, Register src) {
8582   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8583   emit_int8(0x0F);
8584   emit_int8((unsigned char)0xBE);
8585   emit_int8((unsigned char)(0xC0 | encode));
8586 }
8587 
8588 void Assembler::movslq(Register dst, int32_t imm32) {
8589   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
8590   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
8591   // as a result we shouldn't use until tested at runtime...
8592   ShouldNotReachHere();
8593   InstructionMark im(this);
8594   int encode = prefixq_and_encode(dst->encoding());
8595   emit_int8((unsigned char)(0xC7 | encode));
8596   emit_int32(imm32);
8597 }
8598 
8599 void Assembler::movslq(Address dst, int32_t imm32) {
8600   assert(is_simm32(imm32), "lost bits");
8601   InstructionMark im(this);
8602   prefixq(dst);
8603   emit_int8((unsigned char)0xC7);
8604   emit_operand(rax, dst, 4);
8605   emit_int32(imm32);
8606 }
8607 
8608 void Assembler::movslq(Register dst, Address src) {
8609   InstructionMark im(this);
8610   prefixq(src, dst);
8611   emit_int8(0x63);
8612   emit_operand(dst, src);
8613 }
8614 
8615 void Assembler::movslq(Register dst, Register src) {
8616   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8617   emit_int8(0x63);
8618   emit_int8((unsigned char)(0xC0 | encode));
8619 }
8620 
8621 void Assembler::movswq(Register dst, Address src) {
8622   InstructionMark im(this);
8623   prefixq(src, dst);
8624   emit_int8(0x0F);
8625   emit_int8((unsigned char)0xBF);
8626   emit_operand(dst, src);
8627 }
8628 
8629 void Assembler::movswq(Register dst, Register src) {
8630   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8631   emit_int8((unsigned char)0x0F);
8632   emit_int8((unsigned char)0xBF);
8633   emit_int8((unsigned char)(0xC0 | encode));
8634 }
8635 
8636 void Assembler::movzbq(Register dst, Address src) {
8637   InstructionMark im(this);
8638   prefixq(src, dst);
8639   emit_int8((unsigned char)0x0F);
8640   emit_int8((unsigned char)0xB6);
8641   emit_operand(dst, src);
8642 }
8643 
8644 void Assembler::movzbq(Register dst, Register src) {
8645   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8646   emit_int8(0x0F);
8647   emit_int8((unsigned char)0xB6);
8648   emit_int8(0xC0 | encode);
8649 }
8650 
8651 void Assembler::movzwq(Register dst, Address src) {
8652   InstructionMark im(this);
8653   prefixq(src, dst);
8654   emit_int8((unsigned char)0x0F);
8655   emit_int8((unsigned char)0xB7);
8656   emit_operand(dst, src);
8657 }
8658 
8659 void Assembler::movzwq(Register dst, Register src) {
8660   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8661   emit_int8((unsigned char)0x0F);
8662   emit_int8((unsigned char)0xB7);
8663   emit_int8((unsigned char)(0xC0 | encode));
8664 }
8665 
8666 void Assembler::mulq(Address src) {
8667   InstructionMark im(this);
8668   prefixq(src);
8669   emit_int8((unsigned char)0xF7);
8670   emit_operand(rsp, src);
8671 }
8672 
8673 void Assembler::mulq(Register src) {
8674   int encode = prefixq_and_encode(src->encoding());
8675   emit_int8((unsigned char)0xF7);
8676   emit_int8((unsigned char)(0xE0 | encode));
8677 }
8678 
8679 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
8680   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8681   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8682   int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
8683   emit_int8((unsigned char)0xF6);
8684   emit_int8((unsigned char)(0xC0 | encode));
8685 }
8686 
8687 void Assembler::negq(Register dst) {
8688   int encode = prefixq_and_encode(dst->encoding());
8689   emit_int8((unsigned char)0xF7);
8690   emit_int8((unsigned char)(0xD8 | encode));
8691 }
8692 
8693 void Assembler::notq(Register dst) {
8694   int encode = prefixq_and_encode(dst->encoding());
8695   emit_int8((unsigned char)0xF7);
8696   emit_int8((unsigned char)(0xD0 | encode));
8697 }
8698 
8699 void Assembler::orq(Address dst, int32_t imm32) {
8700   InstructionMark im(this);
8701   prefixq(dst);
8702   emit_int8((unsigned char)0x81);
8703   emit_operand(rcx, dst, 4);
8704   emit_int32(imm32);
8705 }
8706 
8707 void Assembler::orq(Register dst, int32_t imm32) {
8708   (void) prefixq_and_encode(dst->encoding());
8709   emit_arith(0x81, 0xC8, dst, imm32);
8710 }
8711 
8712 void Assembler::orq(Register dst, Address src) {
8713   InstructionMark im(this);
8714   prefixq(src, dst);
8715   emit_int8(0x0B);
8716   emit_operand(dst, src);
8717 }
8718 
8719 void Assembler::orq(Register dst, Register src) {
8720   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8721   emit_arith(0x0B, 0xC0, dst, src);
8722 }
8723 
8724 void Assembler::popa() { // 64bit
8725   movq(r15, Address(rsp, 0));
8726   movq(r14, Address(rsp, wordSize));
8727   movq(r13, Address(rsp, 2 * wordSize));
8728   movq(r12, Address(rsp, 3 * wordSize));
8729   movq(r11, Address(rsp, 4 * wordSize));
8730   movq(r10, Address(rsp, 5 * wordSize));
8731   movq(r9,  Address(rsp, 6 * wordSize));
8732   movq(r8,  Address(rsp, 7 * wordSize));
8733   movq(rdi, Address(rsp, 8 * wordSize));
8734   movq(rsi, Address(rsp, 9 * wordSize));
8735   movq(rbp, Address(rsp, 10 * wordSize));
8736   // skip rsp
8737   movq(rbx, Address(rsp, 12 * wordSize));
8738   movq(rdx, Address(rsp, 13 * wordSize));
8739   movq(rcx, Address(rsp, 14 * wordSize));
8740   movq(rax, Address(rsp, 15 * wordSize));
8741 
8742   addq(rsp, 16 * wordSize);
8743 }
8744 
8745 void Assembler::popcntq(Register dst, Address src) {
8746   assert(VM_Version::supports_popcnt(), "must support");
8747   InstructionMark im(this);
8748   emit_int8((unsigned char)0xF3);
8749   prefixq(src, dst);
8750   emit_int8((unsigned char)0x0F);
8751   emit_int8((unsigned char)0xB8);
8752   emit_operand(dst, src);
8753 }
8754 
8755 void Assembler::popcntq(Register dst, Register src) {
8756   assert(VM_Version::supports_popcnt(), "must support");
8757   emit_int8((unsigned char)0xF3);
8758   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8759   emit_int8((unsigned char)0x0F);
8760   emit_int8((unsigned char)0xB8);
8761   emit_int8((unsigned char)(0xC0 | encode));
8762 }
8763 
8764 void Assembler::popq(Address dst) {
8765   InstructionMark im(this);
8766   prefixq(dst);
8767   emit_int8((unsigned char)0x8F);
8768   emit_operand(rax, dst);
8769 }
8770 
8771 void Assembler::pusha() { // 64bit
8772   // we have to store original rsp.  ABI says that 128 bytes
8773   // below rsp are local scratch.
8774   movq(Address(rsp, -5 * wordSize), rsp);
8775 
8776   subq(rsp, 16 * wordSize);
8777 
8778   movq(Address(rsp, 15 * wordSize), rax);
8779   movq(Address(rsp, 14 * wordSize), rcx);
8780   movq(Address(rsp, 13 * wordSize), rdx);
8781   movq(Address(rsp, 12 * wordSize), rbx);
8782   // skip rsp
8783   movq(Address(rsp, 10 * wordSize), rbp);
8784   movq(Address(rsp, 9 * wordSize), rsi);
8785   movq(Address(rsp, 8 * wordSize), rdi);
8786   movq(Address(rsp, 7 * wordSize), r8);
8787   movq(Address(rsp, 6 * wordSize), r9);
8788   movq(Address(rsp, 5 * wordSize), r10);
8789   movq(Address(rsp, 4 * wordSize), r11);
8790   movq(Address(rsp, 3 * wordSize), r12);
8791   movq(Address(rsp, 2 * wordSize), r13);
8792   movq(Address(rsp, wordSize), r14);
8793   movq(Address(rsp, 0), r15);
8794 }
8795 
8796 void Assembler::pushq(Address src) {
8797   InstructionMark im(this);
8798   prefixq(src);
8799   emit_int8((unsigned char)0xFF);
8800   emit_operand(rsi, src);
8801 }
8802 
8803 void Assembler::rclq(Register dst, int imm8) {
8804   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8805   int encode = prefixq_and_encode(dst->encoding());
8806   if (imm8 == 1) {
8807     emit_int8((unsigned char)0xD1);
8808     emit_int8((unsigned char)(0xD0 | encode));
8809   } else {
8810     emit_int8((unsigned char)0xC1);
8811     emit_int8((unsigned char)(0xD0 | encode));
8812     emit_int8(imm8);
8813   }
8814 }
8815 
8816 void Assembler::rcrq(Register dst, int imm8) {
8817   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8818   int encode = prefixq_and_encode(dst->encoding());
8819   if (imm8 == 1) {
8820     emit_int8((unsigned char)0xD1);
8821     emit_int8((unsigned char)(0xD8 | encode));
8822   } else {
8823     emit_int8((unsigned char)0xC1);
8824     emit_int8((unsigned char)(0xD8 | encode));
8825     emit_int8(imm8);
8826   }
8827 }
8828 
8829 void Assembler::rorq(Register dst, int imm8) {
8830   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8831   int encode = prefixq_and_encode(dst->encoding());
8832   if (imm8 == 1) {
8833     emit_int8((unsigned char)0xD1);
8834     emit_int8((unsigned char)(0xC8 | encode));
8835   } else {
8836     emit_int8((unsigned char)0xC1);
8837     emit_int8((unsigned char)(0xc8 | encode));
8838     emit_int8(imm8);
8839   }
8840 }
8841 
8842 void Assembler::rorxq(Register dst, Register src, int imm8) {
8843   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8844   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8845   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
8846   emit_int8((unsigned char)0xF0);
8847   emit_int8((unsigned char)(0xC0 | encode));
8848   emit_int8(imm8);
8849 }
8850 
8851 void Assembler::rorxd(Register dst, Register src, int imm8) {
8852   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8853   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8854   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
8855   emit_int8((unsigned char)0xF0);
8856   emit_int8((unsigned char)(0xC0 | encode));
8857   emit_int8(imm8);
8858 }
8859 
8860 void Assembler::sarq(Register dst, int imm8) {
8861   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8862   int encode = prefixq_and_encode(dst->encoding());
8863   if (imm8 == 1) {
8864     emit_int8((unsigned char)0xD1);
8865     emit_int8((unsigned char)(0xF8 | encode));
8866   } else {
8867     emit_int8((unsigned char)0xC1);
8868     emit_int8((unsigned char)(0xF8 | encode));
8869     emit_int8(imm8);
8870   }
8871 }
8872 
8873 void Assembler::sarq(Register dst) {
8874   int encode = prefixq_and_encode(dst->encoding());
8875   emit_int8((unsigned char)0xD3);
8876   emit_int8((unsigned char)(0xF8 | encode));
8877 }
8878 
8879 void Assembler::sbbq(Address dst, int32_t imm32) {
8880   InstructionMark im(this);
8881   prefixq(dst);
8882   emit_arith_operand(0x81, rbx, dst, imm32);
8883 }
8884 
8885 void Assembler::sbbq(Register dst, int32_t imm32) {
8886   (void) prefixq_and_encode(dst->encoding());
8887   emit_arith(0x81, 0xD8, dst, imm32);
8888 }
8889 
8890 void Assembler::sbbq(Register dst, Address src) {
8891   InstructionMark im(this);
8892   prefixq(src, dst);
8893   emit_int8(0x1B);
8894   emit_operand(dst, src);
8895 }
8896 
8897 void Assembler::sbbq(Register dst, Register src) {
8898   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8899   emit_arith(0x1B, 0xC0, dst, src);
8900 }
8901 
8902 void Assembler::shlq(Register dst, int imm8) {
8903   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8904   int encode = prefixq_and_encode(dst->encoding());
8905   if (imm8 == 1) {
8906     emit_int8((unsigned char)0xD1);
8907     emit_int8((unsigned char)(0xE0 | encode));
8908   } else {
8909     emit_int8((unsigned char)0xC1);
8910     emit_int8((unsigned char)(0xE0 | encode));
8911     emit_int8(imm8);
8912   }
8913 }
8914 
8915 void Assembler::shlq(Register dst) {
8916   int encode = prefixq_and_encode(dst->encoding());
8917   emit_int8((unsigned char)0xD3);
8918   emit_int8((unsigned char)(0xE0 | encode));
8919 }
8920 
8921 void Assembler::shrq(Register dst, int imm8) {
8922   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8923   int encode = prefixq_and_encode(dst->encoding());
8924   emit_int8((unsigned char)0xC1);
8925   emit_int8((unsigned char)(0xE8 | encode));
8926   emit_int8(imm8);
8927 }
8928 
8929 void Assembler::shrq(Register dst) {
8930   int encode = prefixq_and_encode(dst->encoding());
8931   emit_int8((unsigned char)0xD3);
8932   emit_int8(0xE8 | encode);
8933 }
8934 
8935 void Assembler::subq(Address dst, int32_t imm32) {
8936   InstructionMark im(this);
8937   prefixq(dst);
8938   emit_arith_operand(0x81, rbp, dst, imm32);
8939 }
8940 
8941 void Assembler::subq(Address dst, Register src) {
8942   InstructionMark im(this);
8943   prefixq(dst, src);
8944   emit_int8(0x29);
8945   emit_operand(src, dst);
8946 }
8947 
8948 void Assembler::subq(Register dst, int32_t imm32) {
8949   (void) prefixq_and_encode(dst->encoding());
8950   emit_arith(0x81, 0xE8, dst, imm32);
8951 }
8952 
8953 // Force generation of a 4 byte immediate value even if it fits into 8bit
8954 void Assembler::subq_imm32(Register dst, int32_t imm32) {
8955   (void) prefixq_and_encode(dst->encoding());
8956   emit_arith_imm32(0x81, 0xE8, dst, imm32);
8957 }
8958 
8959 void Assembler::subq(Register dst, Address src) {
8960   InstructionMark im(this);
8961   prefixq(src, dst);
8962   emit_int8(0x2B);
8963   emit_operand(dst, src);
8964 }
8965 
8966 void Assembler::subq(Register dst, Register src) {
8967   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8968   emit_arith(0x2B, 0xC0, dst, src);
8969 }
8970 
8971 void Assembler::testq(Register dst, int32_t imm32) {
8972   // not using emit_arith because test
8973   // doesn't support sign-extension of
8974   // 8bit operands
8975   int encode = dst->encoding();
8976   if (encode == 0) {
8977     prefix(REX_W);
8978     emit_int8((unsigned char)0xA9);
8979   } else {
8980     encode = prefixq_and_encode(encode);
8981     emit_int8((unsigned char)0xF7);
8982     emit_int8((unsigned char)(0xC0 | encode));
8983   }
8984   emit_int32(imm32);
8985 }
8986 
8987 void Assembler::testq(Register dst, Register src) {
8988   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8989   emit_arith(0x85, 0xC0, dst, src);
8990 }
8991 
8992 void Assembler::xaddq(Address dst, Register src) {
8993   InstructionMark im(this);
8994   prefixq(dst, src);
8995   emit_int8(0x0F);
8996   emit_int8((unsigned char)0xC1);
8997   emit_operand(src, dst);
8998 }
8999 
9000 void Assembler::xchgq(Register dst, Address src) {
9001   InstructionMark im(this);
9002   prefixq(src, dst);
9003   emit_int8((unsigned char)0x87);
9004   emit_operand(dst, src);
9005 }
9006 
9007 void Assembler::xchgq(Register dst, Register src) {
9008   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9009   emit_int8((unsigned char)0x87);
9010   emit_int8((unsigned char)(0xc0 | encode));
9011 }
9012 
9013 void Assembler::xorq(Register dst, Register src) {
9014   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9015   emit_arith(0x33, 0xC0, dst, src);
9016 }
9017 
9018 void Assembler::xorq(Register dst, Address src) {
9019   InstructionMark im(this);
9020   prefixq(src, dst);
9021   emit_int8(0x33);
9022   emit_operand(dst, src);
9023 }
9024 
9025 #endif // !LP64