1 /*
   2  * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "gc/shared/cardTableModRefBS.hpp"
  29 #include "gc/shared/collectedHeap.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "memory/resourceArea.hpp"
  32 #include "prims/methodHandles.hpp"
  33 #include "runtime/biasedLocking.hpp"
  34 #include "runtime/objectMonitor.hpp"
  35 #include "runtime/os.hpp"
  36 #include "runtime/sharedRuntime.hpp"
  37 #include "runtime/stubRoutines.hpp"
  38 #include "utilities/macros.hpp"
  39 #if INCLUDE_ALL_GCS
  40 #include "gc/g1/g1BarrierSet.hpp"
  41 #include "gc/g1/g1CollectedHeap.inline.hpp"
  42 #include "gc/g1/heapRegion.hpp"
  43 #endif // INCLUDE_ALL_GCS
  44 
  45 #ifdef PRODUCT
  46 #define BLOCK_COMMENT(str) /* nothing */
  47 #define STOP(error) stop(error)
  48 #else
  49 #define BLOCK_COMMENT(str) block_comment(str)
  50 #define STOP(error) block_comment(error); stop(error)
  51 #endif
  52 
  53 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  54 // Implementation of AddressLiteral
  55 
  56 // A 2-D table for managing compressed displacement(disp8) on EVEX enabled platforms.
  57 unsigned char tuple_table[Assembler::EVEX_ETUP + 1][Assembler::AVX_512bit + 1] = {
  58   // -----------------Table 4.5 -------------------- //
  59   16, 32, 64,  // EVEX_FV(0)
  60   4,  4,  4,   // EVEX_FV(1) - with Evex.b
  61   16, 32, 64,  // EVEX_FV(2) - with Evex.w
  62   8,  8,  8,   // EVEX_FV(3) - with Evex.w and Evex.b
  63   8,  16, 32,  // EVEX_HV(0)
  64   4,  4,  4,   // EVEX_HV(1) - with Evex.b
  65   // -----------------Table 4.6 -------------------- //
  66   16, 32, 64,  // EVEX_FVM(0)
  67   1,  1,  1,   // EVEX_T1S(0)
  68   2,  2,  2,   // EVEX_T1S(1)
  69   4,  4,  4,   // EVEX_T1S(2)
  70   8,  8,  8,   // EVEX_T1S(3)
  71   4,  4,  4,   // EVEX_T1F(0)
  72   8,  8,  8,   // EVEX_T1F(1)
  73   8,  8,  8,   // EVEX_T2(0)
  74   0,  16, 16,  // EVEX_T2(1)
  75   0,  16, 16,  // EVEX_T4(0)
  76   0,  0,  32,  // EVEX_T4(1)
  77   0,  0,  32,  // EVEX_T8(0)
  78   8,  16, 32,  // EVEX_HVM(0)
  79   4,  8,  16,  // EVEX_QVM(0)
  80   2,  4,  8,   // EVEX_OVM(0)
  81   16, 16, 16,  // EVEX_M128(0)
  82   8,  32, 64,  // EVEX_DUP(0)
  83   0,  0,  0    // EVEX_NTUP
  84 };
  85 
  86 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  87   _is_lval = false;
  88   _target = target;
  89   switch (rtype) {
  90   case relocInfo::oop_type:
  91   case relocInfo::metadata_type:
  92     // Oops are a special case. Normally they would be their own section
  93     // but in cases like icBuffer they are literals in the code stream that
  94     // we don't have a section for. We use none so that we get a literal address
  95     // which is always patchable.
  96     break;
  97   case relocInfo::external_word_type:
  98     _rspec = external_word_Relocation::spec(target);
  99     break;
 100   case relocInfo::internal_word_type:
 101     _rspec = internal_word_Relocation::spec(target);
 102     break;
 103   case relocInfo::opt_virtual_call_type:
 104     _rspec = opt_virtual_call_Relocation::spec();
 105     break;
 106   case relocInfo::static_call_type:
 107     _rspec = static_call_Relocation::spec();
 108     break;
 109   case relocInfo::runtime_call_type:
 110     _rspec = runtime_call_Relocation::spec();
 111     break;
 112   case relocInfo::poll_type:
 113   case relocInfo::poll_return_type:
 114     _rspec = Relocation::spec_simple(rtype);
 115     break;
 116   case relocInfo::none:
 117     break;
 118   default:
 119     ShouldNotReachHere();
 120     break;
 121   }
 122 }
 123 
 124 // Implementation of Address
 125 
 126 #ifdef _LP64
 127 
 128 Address Address::make_array(ArrayAddress adr) {
 129   // Not implementable on 64bit machines
 130   // Should have been handled higher up the call chain.
 131   ShouldNotReachHere();
 132   return Address();
 133 }
 134 
 135 // exceedingly dangerous constructor
 136 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
 137   _base  = noreg;
 138   _index = noreg;
 139   _scale = no_scale;
 140   _disp  = disp;
 141   switch (rtype) {
 142     case relocInfo::external_word_type:
 143       _rspec = external_word_Relocation::spec(loc);
 144       break;
 145     case relocInfo::internal_word_type:
 146       _rspec = internal_word_Relocation::spec(loc);
 147       break;
 148     case relocInfo::runtime_call_type:
 149       // HMM
 150       _rspec = runtime_call_Relocation::spec();
 151       break;
 152     case relocInfo::poll_type:
 153     case relocInfo::poll_return_type:
 154       _rspec = Relocation::spec_simple(rtype);
 155       break;
 156     case relocInfo::none:
 157       break;
 158     default:
 159       ShouldNotReachHere();
 160   }
 161 }
 162 #else // LP64
 163 
 164 Address Address::make_array(ArrayAddress adr) {
 165   AddressLiteral base = adr.base();
 166   Address index = adr.index();
 167   assert(index._disp == 0, "must not have disp"); // maybe it can?
 168   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
 169   array._rspec = base._rspec;
 170   return array;
 171 }
 172 
 173 // exceedingly dangerous constructor
 174 Address::Address(address loc, RelocationHolder spec) {
 175   _base  = noreg;
 176   _index = noreg;
 177   _scale = no_scale;
 178   _disp  = (intptr_t) loc;
 179   _rspec = spec;
 180 }
 181 
 182 #endif // _LP64
 183 
 184 
 185 
 186 // Convert the raw encoding form into the form expected by the constructor for
 187 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 188 // that to noreg for the Address constructor.
 189 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
 190   RelocationHolder rspec;
 191   if (disp_reloc != relocInfo::none) {
 192     rspec = Relocation::spec_simple(disp_reloc);
 193   }
 194   bool valid_index = index != rsp->encoding();
 195   if (valid_index) {
 196     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 197     madr._rspec = rspec;
 198     return madr;
 199   } else {
 200     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 201     madr._rspec = rspec;
 202     return madr;
 203   }
 204 }
 205 
 206 // Implementation of Assembler
 207 
 208 int AbstractAssembler::code_fill_byte() {
 209   return (u_char)'\xF4'; // hlt
 210 }
 211 
 212 // make this go away someday
 213 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
 214   if (rtype == relocInfo::none)
 215     emit_int32(data);
 216   else
 217     emit_data(data, Relocation::spec_simple(rtype), format);
 218 }
 219 
 220 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
 221   assert(imm_operand == 0, "default format must be immediate in this file");
 222   assert(inst_mark() != NULL, "must be inside InstructionMark");
 223   if (rspec.type() !=  relocInfo::none) {
 224     #ifdef ASSERT
 225       check_relocation(rspec, format);
 226     #endif
 227     // Do not use AbstractAssembler::relocate, which is not intended for
 228     // embedded words.  Instead, relocate to the enclosing instruction.
 229 
 230     // hack. call32 is too wide for mask so use disp32
 231     if (format == call32_operand)
 232       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 233     else
 234       code_section()->relocate(inst_mark(), rspec, format);
 235   }
 236   emit_int32(data);
 237 }
 238 
 239 static int encode(Register r) {
 240   int enc = r->encoding();
 241   if (enc >= 8) {
 242     enc -= 8;
 243   }
 244   return enc;
 245 }
 246 
 247 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
 248   assert(dst->has_byte_register(), "must have byte register");
 249   assert(isByte(op1) && isByte(op2), "wrong opcode");
 250   assert(isByte(imm8), "not a byte");
 251   assert((op1 & 0x01) == 0, "should be 8bit operation");
 252   emit_int8(op1);
 253   emit_int8(op2 | encode(dst));
 254   emit_int8(imm8);
 255 }
 256 
 257 
 258 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
 259   assert(isByte(op1) && isByte(op2), "wrong opcode");
 260   assert((op1 & 0x01) == 1, "should be 32bit operation");
 261   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 262   if (is8bit(imm32)) {
 263     emit_int8(op1 | 0x02); // set sign bit
 264     emit_int8(op2 | encode(dst));
 265     emit_int8(imm32 & 0xFF);
 266   } else {
 267     emit_int8(op1);
 268     emit_int8(op2 | encode(dst));
 269     emit_int32(imm32);
 270   }
 271 }
 272 
 273 // Force generation of a 4 byte immediate value even if it fits into 8bit
 274 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
 275   assert(isByte(op1) && isByte(op2), "wrong opcode");
 276   assert((op1 & 0x01) == 1, "should be 32bit operation");
 277   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 278   emit_int8(op1);
 279   emit_int8(op2 | encode(dst));
 280   emit_int32(imm32);
 281 }
 282 
 283 // immediate-to-memory forms
 284 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
 285   assert((op1 & 0x01) == 1, "should be 32bit operation");
 286   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 287   if (is8bit(imm32)) {
 288     emit_int8(op1 | 0x02); // set sign bit
 289     emit_operand(rm, adr, 1);
 290     emit_int8(imm32 & 0xFF);
 291   } else {
 292     emit_int8(op1);
 293     emit_operand(rm, adr, 4);
 294     emit_int32(imm32);
 295   }
 296 }
 297 
 298 
 299 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 300   assert(isByte(op1) && isByte(op2), "wrong opcode");
 301   emit_int8(op1);
 302   emit_int8(op2 | encode(dst) << 3 | encode(src));
 303 }
 304 
 305 
 306 bool Assembler::query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
 307                                            int cur_tuple_type, int in_size_in_bits, int cur_encoding) {
 308   int mod_idx = 0;
 309   // We will test if the displacement fits the compressed format and if so
 310   // apply the compression to the displacment iff the result is8bit.
 311   if (VM_Version::supports_evex() && is_evex_inst) {
 312     switch (cur_tuple_type) {
 313     case EVEX_FV:
 314       if ((cur_encoding & VEX_W) == VEX_W) {
 315         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 316       } else {
 317         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 318       }
 319       break;
 320 
 321     case EVEX_HV:
 322       mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 323       break;
 324 
 325     case EVEX_FVM:
 326       break;
 327 
 328     case EVEX_T1S:
 329       switch (in_size_in_bits) {
 330       case EVEX_8bit:
 331         break;
 332 
 333       case EVEX_16bit:
 334         mod_idx = 1;
 335         break;
 336 
 337       case EVEX_32bit:
 338         mod_idx = 2;
 339         break;
 340 
 341       case EVEX_64bit:
 342         mod_idx = 3;
 343         break;
 344       }
 345       break;
 346 
 347     case EVEX_T1F:
 348     case EVEX_T2:
 349     case EVEX_T4:
 350       mod_idx = (in_size_in_bits == EVEX_64bit) ? 1 : 0;
 351       break;
 352 
 353     case EVEX_T8:
 354       break;
 355 
 356     case EVEX_HVM:
 357       break;
 358 
 359     case EVEX_QVM:
 360       break;
 361 
 362     case EVEX_OVM:
 363       break;
 364 
 365     case EVEX_M128:
 366       break;
 367 
 368     case EVEX_DUP:
 369       break;
 370 
 371     default:
 372       assert(0, "no valid evex tuple_table entry");
 373       break;
 374     }
 375 
 376     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 377       int disp_factor = tuple_table[cur_tuple_type + mod_idx][vector_len];
 378       if ((disp % disp_factor) == 0) {
 379         int new_disp = disp / disp_factor;
 380         if ((-0x80 <= new_disp && new_disp < 0x80)) {
 381           disp = new_disp;
 382         }
 383       } else {
 384         return false;
 385       }
 386     }
 387   }
 388   return (-0x80 <= disp && disp < 0x80);
 389 }
 390 
 391 
 392 bool Assembler::emit_compressed_disp_byte(int &disp) {
 393   int mod_idx = 0;
 394   // We will test if the displacement fits the compressed format and if so
 395   // apply the compression to the displacment iff the result is8bit.
 396   if (VM_Version::supports_evex() && _attributes && _attributes->is_evex_instruction()) {
 397     int evex_encoding = _attributes->get_evex_encoding();
 398     int tuple_type = _attributes->get_tuple_type();
 399     switch (tuple_type) {
 400     case EVEX_FV:
 401       if ((evex_encoding & VEX_W) == VEX_W) {
 402         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 403       } else {
 404         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 405       }
 406       break;
 407 
 408     case EVEX_HV:
 409       mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 410       break;
 411 
 412     case EVEX_FVM:
 413       break;
 414 
 415     case EVEX_T1S:
 416       switch (_attributes->get_input_size()) {
 417       case EVEX_8bit:
 418         break;
 419 
 420       case EVEX_16bit:
 421         mod_idx = 1;
 422         break;
 423 
 424       case EVEX_32bit:
 425         mod_idx = 2;
 426         break;
 427 
 428       case EVEX_64bit:
 429         mod_idx = 3;
 430         break;
 431       }
 432       break;
 433 
 434     case EVEX_T1F:
 435     case EVEX_T2:
 436     case EVEX_T4:
 437       mod_idx = (_attributes->get_input_size() == EVEX_64bit) ? 1 : 0;
 438       break;
 439 
 440     case EVEX_T8:
 441       break;
 442 
 443     case EVEX_HVM:
 444       break;
 445 
 446     case EVEX_QVM:
 447       break;
 448 
 449     case EVEX_OVM:
 450       break;
 451 
 452     case EVEX_M128:
 453       break;
 454 
 455     case EVEX_DUP:
 456       break;
 457 
 458     default:
 459       assert(0, "no valid evex tuple_table entry");
 460       break;
 461     }
 462 
 463     int vector_len = _attributes->get_vector_len();
 464     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 465       int disp_factor = tuple_table[tuple_type + mod_idx][vector_len];
 466       if ((disp % disp_factor) == 0) {
 467         int new_disp = disp / disp_factor;
 468         if (is8bit(new_disp)) {
 469           disp = new_disp;
 470         }
 471       } else {
 472         return false;
 473       }
 474     }
 475   }
 476   return is8bit(disp);
 477 }
 478 
 479 
 480 void Assembler::emit_operand(Register reg, Register base, Register index,
 481                              Address::ScaleFactor scale, int disp,
 482                              RelocationHolder const& rspec,
 483                              int rip_relative_correction) {
 484   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 485 
 486   // Encode the registers as needed in the fields they are used in
 487 
 488   int regenc = encode(reg) << 3;
 489   int indexenc = index->is_valid() ? encode(index) << 3 : 0;
 490   int baseenc = base->is_valid() ? encode(base) : 0;
 491 
 492   if (base->is_valid()) {
 493     if (index->is_valid()) {
 494       assert(scale != Address::no_scale, "inconsistent address");
 495       // [base + index*scale + disp]
 496       if (disp == 0 && rtype == relocInfo::none  &&
 497           base != rbp LP64_ONLY(&& base != r13)) {
 498         // [base + index*scale]
 499         // [00 reg 100][ss index base]
 500         assert(index != rsp, "illegal addressing mode");
 501         emit_int8(0x04 | regenc);
 502         emit_int8(scale << 6 | indexenc | baseenc);
 503       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 504         // [base + index*scale + imm8]
 505         // [01 reg 100][ss index base] imm8
 506         assert(index != rsp, "illegal addressing mode");
 507         emit_int8(0x44 | regenc);
 508         emit_int8(scale << 6 | indexenc | baseenc);
 509         emit_int8(disp & 0xFF);
 510       } else {
 511         // [base + index*scale + disp32]
 512         // [10 reg 100][ss index base] disp32
 513         assert(index != rsp, "illegal addressing mode");
 514         emit_int8(0x84 | regenc);
 515         emit_int8(scale << 6 | indexenc | baseenc);
 516         emit_data(disp, rspec, disp32_operand);
 517       }
 518     } else if (base == rsp LP64_ONLY(|| base == r12)) {
 519       // [rsp + disp]
 520       if (disp == 0 && rtype == relocInfo::none) {
 521         // [rsp]
 522         // [00 reg 100][00 100 100]
 523         emit_int8(0x04 | regenc);
 524         emit_int8(0x24);
 525       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 526         // [rsp + imm8]
 527         // [01 reg 100][00 100 100] disp8
 528         emit_int8(0x44 | regenc);
 529         emit_int8(0x24);
 530         emit_int8(disp & 0xFF);
 531       } else {
 532         // [rsp + imm32]
 533         // [10 reg 100][00 100 100] disp32
 534         emit_int8(0x84 | regenc);
 535         emit_int8(0x24);
 536         emit_data(disp, rspec, disp32_operand);
 537       }
 538     } else {
 539       // [base + disp]
 540       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
 541       if (disp == 0 && rtype == relocInfo::none &&
 542           base != rbp LP64_ONLY(&& base != r13)) {
 543         // [base]
 544         // [00 reg base]
 545         emit_int8(0x00 | regenc | baseenc);
 546       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 547         // [base + disp8]
 548         // [01 reg base] disp8
 549         emit_int8(0x40 | regenc | baseenc);
 550         emit_int8(disp & 0xFF);
 551       } else {
 552         // [base + disp32]
 553         // [10 reg base] disp32
 554         emit_int8(0x80 | regenc | baseenc);
 555         emit_data(disp, rspec, disp32_operand);
 556       }
 557     }
 558   } else {
 559     if (index->is_valid()) {
 560       assert(scale != Address::no_scale, "inconsistent address");
 561       // [index*scale + disp]
 562       // [00 reg 100][ss index 101] disp32
 563       assert(index != rsp, "illegal addressing mode");
 564       emit_int8(0x04 | regenc);
 565       emit_int8(scale << 6 | indexenc | 0x05);
 566       emit_data(disp, rspec, disp32_operand);
 567     } else if (rtype != relocInfo::none ) {
 568       // [disp] (64bit) RIP-RELATIVE (32bit) abs
 569       // [00 000 101] disp32
 570 
 571       emit_int8(0x05 | regenc);
 572       // Note that the RIP-rel. correction applies to the generated
 573       // disp field, but _not_ to the target address in the rspec.
 574 
 575       // disp was created by converting the target address minus the pc
 576       // at the start of the instruction. That needs more correction here.
 577       // intptr_t disp = target - next_ip;
 578       assert(inst_mark() != NULL, "must be inside InstructionMark");
 579       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 580       int64_t adjusted = disp;
 581       // Do rip-rel adjustment for 64bit
 582       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 583       assert(is_simm32(adjusted),
 584              "must be 32bit offset (RIP relative address)");
 585       emit_data((int32_t) adjusted, rspec, disp32_operand);
 586 
 587     } else {
 588       // 32bit never did this, did everything as the rip-rel/disp code above
 589       // [disp] ABSOLUTE
 590       // [00 reg 100][00 100 101] disp32
 591       emit_int8(0x04 | regenc);
 592       emit_int8(0x25);
 593       emit_data(disp, rspec, disp32_operand);
 594     }
 595   }
 596 }
 597 
 598 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 599                              Address::ScaleFactor scale, int disp,
 600                              RelocationHolder const& rspec) {
 601   if (UseAVX > 2) {
 602     int xreg_enc = reg->encoding();
 603     if (xreg_enc > 15) {
 604       XMMRegister new_reg = as_XMMRegister(xreg_enc & 0xf);
 605       emit_operand((Register)new_reg, base, index, scale, disp, rspec);
 606       return;
 607     }
 608   }
 609   emit_operand((Register)reg, base, index, scale, disp, rspec);
 610 }
 611 
 612 // Secret local extension to Assembler::WhichOperand:
 613 #define end_pc_operand (_WhichOperand_limit)
 614 
 615 address Assembler::locate_operand(address inst, WhichOperand which) {
 616   // Decode the given instruction, and return the address of
 617   // an embedded 32-bit operand word.
 618 
 619   // If "which" is disp32_operand, selects the displacement portion
 620   // of an effective address specifier.
 621   // If "which" is imm64_operand, selects the trailing immediate constant.
 622   // If "which" is call32_operand, selects the displacement of a call or jump.
 623   // Caller is responsible for ensuring that there is such an operand,
 624   // and that it is 32/64 bits wide.
 625 
 626   // If "which" is end_pc_operand, find the end of the instruction.
 627 
 628   address ip = inst;
 629   bool is_64bit = false;
 630 
 631   debug_only(bool has_disp32 = false);
 632   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
 633 
 634   again_after_prefix:
 635   switch (0xFF & *ip++) {
 636 
 637   // These convenience macros generate groups of "case" labels for the switch.
 638 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
 639 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
 640              case (x)+4: case (x)+5: case (x)+6: case (x)+7
 641 #define REP16(x) REP8((x)+0): \
 642               case REP8((x)+8)
 643 
 644   case CS_segment:
 645   case SS_segment:
 646   case DS_segment:
 647   case ES_segment:
 648   case FS_segment:
 649   case GS_segment:
 650     // Seems dubious
 651     LP64_ONLY(assert(false, "shouldn't have that prefix"));
 652     assert(ip == inst+1, "only one prefix allowed");
 653     goto again_after_prefix;
 654 
 655   case 0x67:
 656   case REX:
 657   case REX_B:
 658   case REX_X:
 659   case REX_XB:
 660   case REX_R:
 661   case REX_RB:
 662   case REX_RX:
 663   case REX_RXB:
 664     NOT_LP64(assert(false, "64bit prefixes"));
 665     goto again_after_prefix;
 666 
 667   case REX_W:
 668   case REX_WB:
 669   case REX_WX:
 670   case REX_WXB:
 671   case REX_WR:
 672   case REX_WRB:
 673   case REX_WRX:
 674   case REX_WRXB:
 675     NOT_LP64(assert(false, "64bit prefixes"));
 676     is_64bit = true;
 677     goto again_after_prefix;
 678 
 679   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
 680   case 0x88: // movb a, r
 681   case 0x89: // movl a, r
 682   case 0x8A: // movb r, a
 683   case 0x8B: // movl r, a
 684   case 0x8F: // popl a
 685     debug_only(has_disp32 = true);
 686     break;
 687 
 688   case 0x68: // pushq #32
 689     if (which == end_pc_operand) {
 690       return ip + 4;
 691     }
 692     assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
 693     return ip;                  // not produced by emit_operand
 694 
 695   case 0x66: // movw ... (size prefix)
 696     again_after_size_prefix2:
 697     switch (0xFF & *ip++) {
 698     case REX:
 699     case REX_B:
 700     case REX_X:
 701     case REX_XB:
 702     case REX_R:
 703     case REX_RB:
 704     case REX_RX:
 705     case REX_RXB:
 706     case REX_W:
 707     case REX_WB:
 708     case REX_WX:
 709     case REX_WXB:
 710     case REX_WR:
 711     case REX_WRB:
 712     case REX_WRX:
 713     case REX_WRXB:
 714       NOT_LP64(assert(false, "64bit prefix found"));
 715       goto again_after_size_prefix2;
 716     case 0x8B: // movw r, a
 717     case 0x89: // movw a, r
 718       debug_only(has_disp32 = true);
 719       break;
 720     case 0xC7: // movw a, #16
 721       debug_only(has_disp32 = true);
 722       tail_size = 2;  // the imm16
 723       break;
 724     case 0x0F: // several SSE/SSE2 variants
 725       ip--;    // reparse the 0x0F
 726       goto again_after_prefix;
 727     default:
 728       ShouldNotReachHere();
 729     }
 730     break;
 731 
 732   case REP8(0xB8): // movl/q r, #32/#64(oop?)
 733     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
 734     // these asserts are somewhat nonsensical
 735 #ifndef _LP64
 736     assert(which == imm_operand || which == disp32_operand,
 737            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 738 #else
 739     assert((which == call32_operand || which == imm_operand) && is_64bit ||
 740            which == narrow_oop_operand && !is_64bit,
 741            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 742 #endif // _LP64
 743     return ip;
 744 
 745   case 0x69: // imul r, a, #32
 746   case 0xC7: // movl a, #32(oop?)
 747     tail_size = 4;
 748     debug_only(has_disp32 = true); // has both kinds of operands!
 749     break;
 750 
 751   case 0x0F: // movx..., etc.
 752     switch (0xFF & *ip++) {
 753     case 0x3A: // pcmpestri
 754       tail_size = 1;
 755     case 0x38: // ptest, pmovzxbw
 756       ip++; // skip opcode
 757       debug_only(has_disp32 = true); // has both kinds of operands!
 758       break;
 759 
 760     case 0x70: // pshufd r, r/a, #8
 761       debug_only(has_disp32 = true); // has both kinds of operands!
 762     case 0x73: // psrldq r, #8
 763       tail_size = 1;
 764       break;
 765 
 766     case 0x12: // movlps
 767     case 0x28: // movaps
 768     case 0x2E: // ucomiss
 769     case 0x2F: // comiss
 770     case 0x54: // andps
 771     case 0x55: // andnps
 772     case 0x56: // orps
 773     case 0x57: // xorps
 774     case 0x58: // addpd
 775     case 0x59: // mulpd
 776     case 0x6E: // movd
 777     case 0x7E: // movd
 778     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
 779     case 0xFE: // paddd
 780       debug_only(has_disp32 = true);
 781       break;
 782 
 783     case 0xAD: // shrd r, a, %cl
 784     case 0xAF: // imul r, a
 785     case 0xBE: // movsbl r, a (movsxb)
 786     case 0xBF: // movswl r, a (movsxw)
 787     case 0xB6: // movzbl r, a (movzxb)
 788     case 0xB7: // movzwl r, a (movzxw)
 789     case REP16(0x40): // cmovl cc, r, a
 790     case 0xB0: // cmpxchgb
 791     case 0xB1: // cmpxchg
 792     case 0xC1: // xaddl
 793     case 0xC7: // cmpxchg8
 794     case REP16(0x90): // setcc a
 795       debug_only(has_disp32 = true);
 796       // fall out of the switch to decode the address
 797       break;
 798 
 799     case 0xC4: // pinsrw r, a, #8
 800       debug_only(has_disp32 = true);
 801     case 0xC5: // pextrw r, r, #8
 802       tail_size = 1;  // the imm8
 803       break;
 804 
 805     case 0xAC: // shrd r, a, #8
 806       debug_only(has_disp32 = true);
 807       tail_size = 1;  // the imm8
 808       break;
 809 
 810     case REP16(0x80): // jcc rdisp32
 811       if (which == end_pc_operand)  return ip + 4;
 812       assert(which == call32_operand, "jcc has no disp32 or imm");
 813       return ip;
 814     default:
 815       ShouldNotReachHere();
 816     }
 817     break;
 818 
 819   case 0x81: // addl a, #32; addl r, #32
 820     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 821     // on 32bit in the case of cmpl, the imm might be an oop
 822     tail_size = 4;
 823     debug_only(has_disp32 = true); // has both kinds of operands!
 824     break;
 825 
 826   case 0x83: // addl a, #8; addl r, #8
 827     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 828     debug_only(has_disp32 = true); // has both kinds of operands!
 829     tail_size = 1;
 830     break;
 831 
 832   case 0x9B:
 833     switch (0xFF & *ip++) {
 834     case 0xD9: // fnstcw a
 835       debug_only(has_disp32 = true);
 836       break;
 837     default:
 838       ShouldNotReachHere();
 839     }
 840     break;
 841 
 842   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
 843   case REP4(0x10): // adc...
 844   case REP4(0x20): // and...
 845   case REP4(0x30): // xor...
 846   case REP4(0x08): // or...
 847   case REP4(0x18): // sbb...
 848   case REP4(0x28): // sub...
 849   case 0xF7: // mull a
 850   case 0x8D: // lea r, a
 851   case 0x87: // xchg r, a
 852   case REP4(0x38): // cmp...
 853   case 0x85: // test r, a
 854     debug_only(has_disp32 = true); // has both kinds of operands!
 855     break;
 856 
 857   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
 858   case 0xC6: // movb a, #8
 859   case 0x80: // cmpb a, #8
 860   case 0x6B: // imul r, a, #8
 861     debug_only(has_disp32 = true); // has both kinds of operands!
 862     tail_size = 1; // the imm8
 863     break;
 864 
 865   case 0xC4: // VEX_3bytes
 866   case 0xC5: // VEX_2bytes
 867     assert((UseAVX > 0), "shouldn't have VEX prefix");
 868     assert(ip == inst+1, "no prefixes allowed");
 869     // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
 870     // but they have prefix 0x0F and processed when 0x0F processed above.
 871     //
 872     // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
 873     // instructions (these instructions are not supported in 64-bit mode).
 874     // To distinguish them bits [7:6] are set in the VEX second byte since
 875     // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
 876     // those VEX bits REX and vvvv bits are inverted.
 877     //
 878     // Fortunately C2 doesn't generate these instructions so we don't need
 879     // to check for them in product version.
 880 
 881     // Check second byte
 882     NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
 883 
 884     int vex_opcode;
 885     // First byte
 886     if ((0xFF & *inst) == VEX_3bytes) {
 887       vex_opcode = VEX_OPCODE_MASK & *ip;
 888       ip++; // third byte
 889       is_64bit = ((VEX_W & *ip) == VEX_W);
 890     } else {
 891       vex_opcode = VEX_OPCODE_0F;
 892     }
 893     ip++; // opcode
 894     // To find the end of instruction (which == end_pc_operand).
 895     switch (vex_opcode) {
 896       case VEX_OPCODE_0F:
 897         switch (0xFF & *ip) {
 898         case 0x70: // pshufd r, r/a, #8
 899         case 0x71: // ps[rl|ra|ll]w r, #8
 900         case 0x72: // ps[rl|ra|ll]d r, #8
 901         case 0x73: // ps[rl|ra|ll]q r, #8
 902         case 0xC2: // cmp[ps|pd|ss|sd] r, r, r/a, #8
 903         case 0xC4: // pinsrw r, r, r/a, #8
 904         case 0xC5: // pextrw r/a, r, #8
 905         case 0xC6: // shufp[s|d] r, r, r/a, #8
 906           tail_size = 1;  // the imm8
 907           break;
 908         }
 909         break;
 910       case VEX_OPCODE_0F_3A:
 911         tail_size = 1;
 912         break;
 913     }
 914     ip++; // skip opcode
 915     debug_only(has_disp32 = true); // has both kinds of operands!
 916     break;
 917 
 918   case 0x62: // EVEX_4bytes
 919     assert(VM_Version::supports_evex(), "shouldn't have EVEX prefix");
 920     assert(ip == inst+1, "no prefixes allowed");
 921     // no EVEX collisions, all instructions that have 0x62 opcodes
 922     // have EVEX versions and are subopcodes of 0x66
 923     ip++; // skip P0 and exmaine W in P1
 924     is_64bit = ((VEX_W & *ip) == VEX_W);
 925     ip++; // move to P2
 926     ip++; // skip P2, move to opcode
 927     // To find the end of instruction (which == end_pc_operand).
 928     switch (0xFF & *ip) {
 929     case 0x22: // pinsrd r, r/a, #8
 930     case 0x61: // pcmpestri r, r/a, #8
 931     case 0x70: // pshufd r, r/a, #8
 932     case 0x73: // psrldq r, #8
 933       tail_size = 1;  // the imm8
 934       break;
 935     default:
 936       break;
 937     }
 938     ip++; // skip opcode
 939     debug_only(has_disp32 = true); // has both kinds of operands!
 940     break;
 941 
 942   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 943   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 944   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 945   case 0xDD: // fld_d a; fst_d a; fstp_d a
 946   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 947   case 0xDF: // fild_d a; fistp_d a
 948   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 949   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 950   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 951     debug_only(has_disp32 = true);
 952     break;
 953 
 954   case 0xE8: // call rdisp32
 955   case 0xE9: // jmp  rdisp32
 956     if (which == end_pc_operand)  return ip + 4;
 957     assert(which == call32_operand, "call has no disp32 or imm");
 958     return ip;
 959 
 960   case 0xF0:                    // Lock
 961     assert(os::is_MP(), "only on MP");
 962     goto again_after_prefix;
 963 
 964   case 0xF3:                    // For SSE
 965   case 0xF2:                    // For SSE2
 966     switch (0xFF & *ip++) {
 967     case REX:
 968     case REX_B:
 969     case REX_X:
 970     case REX_XB:
 971     case REX_R:
 972     case REX_RB:
 973     case REX_RX:
 974     case REX_RXB:
 975     case REX_W:
 976     case REX_WB:
 977     case REX_WX:
 978     case REX_WXB:
 979     case REX_WR:
 980     case REX_WRB:
 981     case REX_WRX:
 982     case REX_WRXB:
 983       NOT_LP64(assert(false, "found 64bit prefix"));
 984       ip++;
 985     default:
 986       ip++;
 987     }
 988     debug_only(has_disp32 = true); // has both kinds of operands!
 989     break;
 990 
 991   default:
 992     ShouldNotReachHere();
 993 
 994 #undef REP8
 995 #undef REP16
 996   }
 997 
 998   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
 999 #ifdef _LP64
1000   assert(which != imm_operand, "instruction is not a movq reg, imm64");
1001 #else
1002   // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
1003   assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
1004 #endif // LP64
1005   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
1006 
1007   // parse the output of emit_operand
1008   int op2 = 0xFF & *ip++;
1009   int base = op2 & 0x07;
1010   int op3 = -1;
1011   const int b100 = 4;
1012   const int b101 = 5;
1013   if (base == b100 && (op2 >> 6) != 3) {
1014     op3 = 0xFF & *ip++;
1015     base = op3 & 0x07;   // refetch the base
1016   }
1017   // now ip points at the disp (if any)
1018 
1019   switch (op2 >> 6) {
1020   case 0:
1021     // [00 reg  100][ss index base]
1022     // [00 reg  100][00   100  esp]
1023     // [00 reg base]
1024     // [00 reg  100][ss index  101][disp32]
1025     // [00 reg  101]               [disp32]
1026 
1027     if (base == b101) {
1028       if (which == disp32_operand)
1029         return ip;              // caller wants the disp32
1030       ip += 4;                  // skip the disp32
1031     }
1032     break;
1033 
1034   case 1:
1035     // [01 reg  100][ss index base][disp8]
1036     // [01 reg  100][00   100  esp][disp8]
1037     // [01 reg base]               [disp8]
1038     ip += 1;                    // skip the disp8
1039     break;
1040 
1041   case 2:
1042     // [10 reg  100][ss index base][disp32]
1043     // [10 reg  100][00   100  esp][disp32]
1044     // [10 reg base]               [disp32]
1045     if (which == disp32_operand)
1046       return ip;                // caller wants the disp32
1047     ip += 4;                    // skip the disp32
1048     break;
1049 
1050   case 3:
1051     // [11 reg base]  (not a memory addressing mode)
1052     break;
1053   }
1054 
1055   if (which == end_pc_operand) {
1056     return ip + tail_size;
1057   }
1058 
1059 #ifdef _LP64
1060   assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
1061 #else
1062   assert(which == imm_operand, "instruction has only an imm field");
1063 #endif // LP64
1064   return ip;
1065 }
1066 
1067 address Assembler::locate_next_instruction(address inst) {
1068   // Secretly share code with locate_operand:
1069   return locate_operand(inst, end_pc_operand);
1070 }
1071 
1072 
1073 #ifdef ASSERT
1074 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
1075   address inst = inst_mark();
1076   assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
1077   address opnd;
1078 
1079   Relocation* r = rspec.reloc();
1080   if (r->type() == relocInfo::none) {
1081     return;
1082   } else if (r->is_call() || format == call32_operand) {
1083     // assert(format == imm32_operand, "cannot specify a nonzero format");
1084     opnd = locate_operand(inst, call32_operand);
1085   } else if (r->is_data()) {
1086     assert(format == imm_operand || format == disp32_operand
1087            LP64_ONLY(|| format == narrow_oop_operand), "format ok");
1088     opnd = locate_operand(inst, (WhichOperand)format);
1089   } else {
1090     assert(format == imm_operand, "cannot specify a format");
1091     return;
1092   }
1093   assert(opnd == pc(), "must put operand where relocs can find it");
1094 }
1095 #endif // ASSERT
1096 
1097 void Assembler::emit_operand32(Register reg, Address adr) {
1098   assert(reg->encoding() < 8, "no extended registers");
1099   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1100   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1101                adr._rspec);
1102 }
1103 
1104 void Assembler::emit_operand(Register reg, Address adr,
1105                              int rip_relative_correction) {
1106   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1107                adr._rspec,
1108                rip_relative_correction);
1109 }
1110 
1111 void Assembler::emit_operand(XMMRegister reg, Address adr) {
1112   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1113                adr._rspec);
1114 }
1115 
1116 // MMX operations
1117 void Assembler::emit_operand(MMXRegister reg, Address adr) {
1118   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1119   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1120 }
1121 
1122 // work around gcc (3.2.1-7a) bug
1123 void Assembler::emit_operand(Address adr, MMXRegister reg) {
1124   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1125   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1126 }
1127 
1128 
1129 void Assembler::emit_farith(int b1, int b2, int i) {
1130   assert(isByte(b1) && isByte(b2), "wrong opcode");
1131   assert(0 <= i &&  i < 8, "illegal stack offset");
1132   emit_int8(b1);
1133   emit_int8(b2 + i);
1134 }
1135 
1136 
1137 // Now the Assembler instructions (identical for 32/64 bits)
1138 
1139 void Assembler::adcl(Address dst, int32_t imm32) {
1140   InstructionMark im(this);
1141   prefix(dst);
1142   emit_arith_operand(0x81, rdx, dst, imm32);
1143 }
1144 
1145 void Assembler::adcl(Address dst, Register src) {
1146   InstructionMark im(this);
1147   prefix(dst, src);
1148   emit_int8(0x11);
1149   emit_operand(src, dst);
1150 }
1151 
1152 void Assembler::adcl(Register dst, int32_t imm32) {
1153   prefix(dst);
1154   emit_arith(0x81, 0xD0, dst, imm32);
1155 }
1156 
1157 void Assembler::adcl(Register dst, Address src) {
1158   InstructionMark im(this);
1159   prefix(src, dst);
1160   emit_int8(0x13);
1161   emit_operand(dst, src);
1162 }
1163 
1164 void Assembler::adcl(Register dst, Register src) {
1165   (void) prefix_and_encode(dst->encoding(), src->encoding());
1166   emit_arith(0x13, 0xC0, dst, src);
1167 }
1168 
1169 void Assembler::addl(Address dst, int32_t imm32) {
1170   InstructionMark im(this);
1171   prefix(dst);
1172   emit_arith_operand(0x81, rax, dst, imm32);
1173 }
1174 
1175 void Assembler::addb(Address dst, int imm8) {
1176   InstructionMark im(this);
1177   prefix(dst);
1178   emit_int8((unsigned char)0x80);
1179   emit_operand(rax, dst, 1);
1180   emit_int8(imm8);
1181 }
1182 
1183 void Assembler::addw(Address dst, int imm16) {
1184   InstructionMark im(this);
1185   emit_int8(0x66);
1186   prefix(dst);
1187   emit_int8((unsigned char)0x81);
1188   emit_operand(rax, dst, 2);
1189   emit_int16(imm16);
1190 }
1191 
1192 void Assembler::addl(Address dst, Register src) {
1193   InstructionMark im(this);
1194   prefix(dst, src);
1195   emit_int8(0x01);
1196   emit_operand(src, dst);
1197 }
1198 
1199 void Assembler::addl(Register dst, int32_t imm32) {
1200   prefix(dst);
1201   emit_arith(0x81, 0xC0, dst, imm32);
1202 }
1203 
1204 void Assembler::addl(Register dst, Address src) {
1205   InstructionMark im(this);
1206   prefix(src, dst);
1207   emit_int8(0x03);
1208   emit_operand(dst, src);
1209 }
1210 
1211 void Assembler::addl(Register dst, Register src) {
1212   (void) prefix_and_encode(dst->encoding(), src->encoding());
1213   emit_arith(0x03, 0xC0, dst, src);
1214 }
1215 
1216 void Assembler::addr_nop_4() {
1217   assert(UseAddressNop, "no CPU support");
1218   // 4 bytes: NOP DWORD PTR [EAX+0]
1219   emit_int8(0x0F);
1220   emit_int8(0x1F);
1221   emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
1222   emit_int8(0);    // 8-bits offset (1 byte)
1223 }
1224 
1225 void Assembler::addr_nop_5() {
1226   assert(UseAddressNop, "no CPU support");
1227   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
1228   emit_int8(0x0F);
1229   emit_int8(0x1F);
1230   emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
1231   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1232   emit_int8(0);    // 8-bits offset (1 byte)
1233 }
1234 
1235 void Assembler::addr_nop_7() {
1236   assert(UseAddressNop, "no CPU support");
1237   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
1238   emit_int8(0x0F);
1239   emit_int8(0x1F);
1240   emit_int8((unsigned char)0x80);
1241                    // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
1242   emit_int32(0);   // 32-bits offset (4 bytes)
1243 }
1244 
1245 void Assembler::addr_nop_8() {
1246   assert(UseAddressNop, "no CPU support");
1247   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1248   emit_int8(0x0F);
1249   emit_int8(0x1F);
1250   emit_int8((unsigned char)0x84);
1251                    // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
1252   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1253   emit_int32(0);   // 32-bits offset (4 bytes)
1254 }
1255 
1256 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
1257   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1258   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1259   attributes.set_rex_vex_w_reverted();
1260   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1261   emit_int8(0x58);
1262   emit_int8((unsigned char)(0xC0 | encode));
1263 }
1264 
1265 void Assembler::addsd(XMMRegister dst, Address src) {
1266   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1267   InstructionMark im(this);
1268   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1269   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1270   attributes.set_rex_vex_w_reverted();
1271   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1272   emit_int8(0x58);
1273   emit_operand(dst, src);
1274 }
1275 
1276 void Assembler::addss(XMMRegister dst, XMMRegister src) {
1277   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1278   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1279   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1280   emit_int8(0x58);
1281   emit_int8((unsigned char)(0xC0 | encode));
1282 }
1283 
1284 void Assembler::addss(XMMRegister dst, Address src) {
1285   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1286   InstructionMark im(this);
1287   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1288   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1289   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1290   emit_int8(0x58);
1291   emit_operand(dst, src);
1292 }
1293 
1294 void Assembler::aesdec(XMMRegister dst, Address src) {
1295   assert(VM_Version::supports_aes(), "");
1296   InstructionMark im(this);
1297   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1298   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1299   emit_int8((unsigned char)0xDE);
1300   emit_operand(dst, src);
1301 }
1302 
1303 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1304   assert(VM_Version::supports_aes(), "");
1305   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1306   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1307   emit_int8((unsigned char)0xDE);
1308   emit_int8(0xC0 | encode);
1309 }
1310 
1311 void Assembler::aesdeclast(XMMRegister dst, Address src) {
1312   assert(VM_Version::supports_aes(), "");
1313   InstructionMark im(this);
1314   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1315   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1316   emit_int8((unsigned char)0xDF);
1317   emit_operand(dst, src);
1318 }
1319 
1320 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1321   assert(VM_Version::supports_aes(), "");
1322   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1323   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1324   emit_int8((unsigned char)0xDF);
1325   emit_int8((unsigned char)(0xC0 | encode));
1326 }
1327 
1328 void Assembler::aesenc(XMMRegister dst, Address src) {
1329   assert(VM_Version::supports_aes(), "");
1330   InstructionMark im(this);
1331   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1332   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1333   emit_int8((unsigned char)0xDC);
1334   emit_operand(dst, src);
1335 }
1336 
1337 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1338   assert(VM_Version::supports_aes(), "");
1339   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1340   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1341   emit_int8((unsigned char)0xDC);
1342   emit_int8(0xC0 | encode);
1343 }
1344 
1345 void Assembler::aesenclast(XMMRegister dst, Address src) {
1346   assert(VM_Version::supports_aes(), "");
1347   InstructionMark im(this);
1348   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1349   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1350   emit_int8((unsigned char)0xDD);
1351   emit_operand(dst, src);
1352 }
1353 
1354 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1355   assert(VM_Version::supports_aes(), "");
1356   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1357   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1358   emit_int8((unsigned char)0xDD);
1359   emit_int8((unsigned char)(0xC0 | encode));
1360 }
1361 
1362 void Assembler::andl(Address dst, int32_t imm32) {
1363   InstructionMark im(this);
1364   prefix(dst);
1365   emit_int8((unsigned char)0x81);
1366   emit_operand(rsp, dst, 4);
1367   emit_int32(imm32);
1368 }
1369 
1370 void Assembler::andl(Register dst, int32_t imm32) {
1371   prefix(dst);
1372   emit_arith(0x81, 0xE0, dst, imm32);
1373 }
1374 
1375 void Assembler::andl(Register dst, Address src) {
1376   InstructionMark im(this);
1377   prefix(src, dst);
1378   emit_int8(0x23);
1379   emit_operand(dst, src);
1380 }
1381 
1382 void Assembler::andl(Register dst, Register src) {
1383   (void) prefix_and_encode(dst->encoding(), src->encoding());
1384   emit_arith(0x23, 0xC0, dst, src);
1385 }
1386 
1387 void Assembler::andnl(Register dst, Register src1, Register src2) {
1388   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1389   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1390   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1391   emit_int8((unsigned char)0xF2);
1392   emit_int8((unsigned char)(0xC0 | encode));
1393 }
1394 
1395 void Assembler::andnl(Register dst, Register src1, Address src2) {
1396   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1397   InstructionMark im(this);
1398   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1399   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1400   emit_int8((unsigned char)0xF2);
1401   emit_operand(dst, src2);
1402 }
1403 
1404 void Assembler::bsfl(Register dst, Register src) {
1405   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1406   emit_int8(0x0F);
1407   emit_int8((unsigned char)0xBC);
1408   emit_int8((unsigned char)(0xC0 | encode));
1409 }
1410 
1411 void Assembler::bsrl(Register dst, Register src) {
1412   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1413   emit_int8(0x0F);
1414   emit_int8((unsigned char)0xBD);
1415   emit_int8((unsigned char)(0xC0 | encode));
1416 }
1417 
1418 void Assembler::bswapl(Register reg) { // bswap
1419   int encode = prefix_and_encode(reg->encoding());
1420   emit_int8(0x0F);
1421   emit_int8((unsigned char)(0xC8 | encode));
1422 }
1423 
1424 void Assembler::blsil(Register dst, Register src) {
1425   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1426   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1427   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1428   emit_int8((unsigned char)0xF3);
1429   emit_int8((unsigned char)(0xC0 | encode));
1430 }
1431 
1432 void Assembler::blsil(Register dst, Address src) {
1433   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1434   InstructionMark im(this);
1435   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1436   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1437   emit_int8((unsigned char)0xF3);
1438   emit_operand(rbx, src);
1439 }
1440 
1441 void Assembler::blsmskl(Register dst, Register src) {
1442   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1443   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1444   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1445   emit_int8((unsigned char)0xF3);
1446   emit_int8((unsigned char)(0xC0 | encode));
1447 }
1448 
1449 void Assembler::blsmskl(Register dst, Address src) {
1450   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1451   InstructionMark im(this);
1452   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1453   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1454   emit_int8((unsigned char)0xF3);
1455   emit_operand(rdx, src);
1456 }
1457 
1458 void Assembler::blsrl(Register dst, Register src) {
1459   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1460   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1461   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1462   emit_int8((unsigned char)0xF3);
1463   emit_int8((unsigned char)(0xC0 | encode));
1464 }
1465 
1466 void Assembler::blsrl(Register dst, Address src) {
1467   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1468   InstructionMark im(this);
1469   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1470   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1471   emit_int8((unsigned char)0xF3);
1472   emit_operand(rcx, src);
1473 }
1474 
1475 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1476   // suspect disp32 is always good
1477   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1478 
1479   if (L.is_bound()) {
1480     const int long_size = 5;
1481     int offs = (int)( target(L) - pc() );
1482     assert(offs <= 0, "assembler error");
1483     InstructionMark im(this);
1484     // 1110 1000 #32-bit disp
1485     emit_int8((unsigned char)0xE8);
1486     emit_data(offs - long_size, rtype, operand);
1487   } else {
1488     InstructionMark im(this);
1489     // 1110 1000 #32-bit disp
1490     L.add_patch_at(code(), locator());
1491 
1492     emit_int8((unsigned char)0xE8);
1493     emit_data(int(0), rtype, operand);
1494   }
1495 }
1496 
1497 void Assembler::call(Register dst) {
1498   int encode = prefix_and_encode(dst->encoding());
1499   emit_int8((unsigned char)0xFF);
1500   emit_int8((unsigned char)(0xD0 | encode));
1501 }
1502 
1503 
1504 void Assembler::call(Address adr) {
1505   InstructionMark im(this);
1506   prefix(adr);
1507   emit_int8((unsigned char)0xFF);
1508   emit_operand(rdx, adr);
1509 }
1510 
1511 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1512   InstructionMark im(this);
1513   emit_int8((unsigned char)0xE8);
1514   intptr_t disp = entry - (pc() + sizeof(int32_t));
1515   // Entry is NULL in case of a scratch emit.
1516   assert(entry == NULL || is_simm32(disp), "disp=" INTPTR_FORMAT " must be 32bit offset (call2)", disp);
1517   // Technically, should use call32_operand, but this format is
1518   // implied by the fact that we're emitting a call instruction.
1519 
1520   int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1521   emit_data((int) disp, rspec, operand);
1522 }
1523 
1524 void Assembler::cdql() {
1525   emit_int8((unsigned char)0x99);
1526 }
1527 
1528 void Assembler::cld() {
1529   emit_int8((unsigned char)0xFC);
1530 }
1531 
1532 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1533   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1534   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1535   emit_int8(0x0F);
1536   emit_int8(0x40 | cc);
1537   emit_int8((unsigned char)(0xC0 | encode));
1538 }
1539 
1540 
1541 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1542   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1543   prefix(src, dst);
1544   emit_int8(0x0F);
1545   emit_int8(0x40 | cc);
1546   emit_operand(dst, src);
1547 }
1548 
1549 void Assembler::cmpb(Address dst, int imm8) {
1550   InstructionMark im(this);
1551   prefix(dst);
1552   emit_int8((unsigned char)0x80);
1553   emit_operand(rdi, dst, 1);
1554   emit_int8(imm8);
1555 }
1556 
1557 void Assembler::cmpl(Address dst, int32_t imm32) {
1558   InstructionMark im(this);
1559   prefix(dst);
1560   emit_int8((unsigned char)0x81);
1561   emit_operand(rdi, dst, 4);
1562   emit_int32(imm32);
1563 }
1564 
1565 void Assembler::cmpl(Register dst, int32_t imm32) {
1566   prefix(dst);
1567   emit_arith(0x81, 0xF8, dst, imm32);
1568 }
1569 
1570 void Assembler::cmpl(Register dst, Register src) {
1571   (void) prefix_and_encode(dst->encoding(), src->encoding());
1572   emit_arith(0x3B, 0xC0, dst, src);
1573 }
1574 
1575 void Assembler::cmpl(Register dst, Address  src) {
1576   InstructionMark im(this);
1577   prefix(src, dst);
1578   emit_int8((unsigned char)0x3B);
1579   emit_operand(dst, src);
1580 }
1581 
1582 void Assembler::cmpw(Address dst, int imm16) {
1583   InstructionMark im(this);
1584   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1585   emit_int8(0x66);
1586   emit_int8((unsigned char)0x81);
1587   emit_operand(rdi, dst, 2);
1588   emit_int16(imm16);
1589 }
1590 
1591 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1592 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1593 // The ZF is set if the compared values were equal, and cleared otherwise.
1594 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1595   InstructionMark im(this);
1596   prefix(adr, reg);
1597   emit_int8(0x0F);
1598   emit_int8((unsigned char)0xB1);
1599   emit_operand(reg, adr);
1600 }
1601 
1602 // The 8-bit cmpxchg compares the value at adr with the contents of rax,
1603 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1604 // The ZF is set if the compared values were equal, and cleared otherwise.
1605 void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg
1606   InstructionMark im(this);
1607   prefix(adr, reg, true);
1608   emit_int8(0x0F);
1609   emit_int8((unsigned char)0xB0);
1610   emit_operand(reg, adr);
1611 }
1612 
1613 void Assembler::comisd(XMMRegister dst, Address src) {
1614   // NOTE: dbx seems to decode this as comiss even though the
1615   // 0x66 is there. Strangly ucomisd comes out correct
1616   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1617   InstructionMark im(this);
1618   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);;
1619   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1620   attributes.set_rex_vex_w_reverted();
1621   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1622   emit_int8(0x2F);
1623   emit_operand(dst, src);
1624 }
1625 
1626 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1627   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1628   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1629   attributes.set_rex_vex_w_reverted();
1630   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1631   emit_int8(0x2F);
1632   emit_int8((unsigned char)(0xC0 | encode));
1633 }
1634 
1635 void Assembler::comiss(XMMRegister dst, Address src) {
1636   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1637   InstructionMark im(this);
1638   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1639   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1640   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1641   emit_int8(0x2F);
1642   emit_operand(dst, src);
1643 }
1644 
1645 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1646   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1647   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1648   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1649   emit_int8(0x2F);
1650   emit_int8((unsigned char)(0xC0 | encode));
1651 }
1652 
1653 void Assembler::cpuid() {
1654   emit_int8(0x0F);
1655   emit_int8((unsigned char)0xA2);
1656 }
1657 
1658 // Opcode / Instruction                      Op /  En  64 - Bit Mode     Compat / Leg Mode Description                  Implemented
1659 // F2 0F 38 F0 / r       CRC32 r32, r / m8   RM        Valid             Valid             Accumulate CRC32 on r / m8.  v
1660 // F2 REX 0F 38 F0 / r   CRC32 r32, r / m8*  RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1661 // F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8   RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1662 //
1663 // F2 0F 38 F1 / r       CRC32 r32, r / m16  RM        Valid             Valid             Accumulate CRC32 on r / m16. v
1664 //
1665 // F2 0F 38 F1 / r       CRC32 r32, r / m32  RM        Valid             Valid             Accumulate CRC32 on r / m32. v
1666 //
1667 // F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64  RM        Valid             N.E.              Accumulate CRC32 on r / m64. v
1668 void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) {
1669   assert(VM_Version::supports_sse4_2(), "");
1670   int8_t w = 0x01;
1671   Prefix p = Prefix_EMPTY;
1672 
1673   emit_int8((int8_t)0xF2);
1674   switch (sizeInBytes) {
1675   case 1:
1676     w = 0;
1677     break;
1678   case 2:
1679   case 4:
1680     break;
1681   LP64_ONLY(case 8:)
1682     // This instruction is not valid in 32 bits
1683     // Note:
1684     // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
1685     //
1686     // Page B - 72   Vol. 2C says
1687     // qwreg2 to qwreg            1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2
1688     // mem64 to qwreg             1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m
1689     //                                                                            F0!!!
1690     // while 3 - 208 Vol. 2A
1691     // F2 REX.W 0F 38 F1 / r       CRC32 r64, r / m64             RM         Valid      N.E.Accumulate CRC32 on r / m64.
1692     //
1693     // the 0 on a last bit is reserved for a different flavor of this instruction :
1694     // F2 REX.W 0F 38 F0 / r       CRC32 r64, r / m8              RM         Valid      N.E.Accumulate CRC32 on r / m8.
1695     p = REX_W;
1696     break;
1697   default:
1698     assert(0, "Unsupported value for a sizeInBytes argument");
1699     break;
1700   }
1701   LP64_ONLY(prefix(crc, v, p);)
1702   emit_int8((int8_t)0x0F);
1703   emit_int8(0x38);
1704   emit_int8((int8_t)(0xF0 | w));
1705   emit_int8(0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7));
1706 }
1707 
1708 void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) {
1709   assert(VM_Version::supports_sse4_2(), "");
1710   InstructionMark im(this);
1711   int8_t w = 0x01;
1712   Prefix p = Prefix_EMPTY;
1713 
1714   emit_int8((int8_t)0xF2);
1715   switch (sizeInBytes) {
1716   case 1:
1717     w = 0;
1718     break;
1719   case 2:
1720   case 4:
1721     break;
1722   LP64_ONLY(case 8:)
1723     // This instruction is not valid in 32 bits
1724     p = REX_W;
1725     break;
1726   default:
1727     assert(0, "Unsupported value for a sizeInBytes argument");
1728     break;
1729   }
1730   LP64_ONLY(prefix(crc, adr, p);)
1731   emit_int8((int8_t)0x0F);
1732   emit_int8(0x38);
1733   emit_int8((int8_t)(0xF0 | w));
1734   emit_operand(crc, adr);
1735 }
1736 
1737 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1738   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1739   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1740   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1741   emit_int8((unsigned char)0xE6);
1742   emit_int8((unsigned char)(0xC0 | encode));
1743 }
1744 
1745 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1746   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1747   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1748   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1749   emit_int8(0x5B);
1750   emit_int8((unsigned char)(0xC0 | encode));
1751 }
1752 
1753 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1754   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1755   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1756   attributes.set_rex_vex_w_reverted();
1757   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1758   emit_int8(0x5A);
1759   emit_int8((unsigned char)(0xC0 | encode));
1760 }
1761 
1762 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1763   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1764   InstructionMark im(this);
1765   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1766   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1767   attributes.set_rex_vex_w_reverted();
1768   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1769   emit_int8(0x5A);
1770   emit_operand(dst, src);
1771 }
1772 
1773 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1774   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1775   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1776   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1777   emit_int8(0x2A);
1778   emit_int8((unsigned char)(0xC0 | encode));
1779 }
1780 
1781 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1782   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1783   InstructionMark im(this);
1784   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1785   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1786   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1787   emit_int8(0x2A);
1788   emit_operand(dst, src);
1789 }
1790 
1791 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1792   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1793   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1794   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1795   emit_int8(0x2A);
1796   emit_int8((unsigned char)(0xC0 | encode));
1797 }
1798 
1799 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1800   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1801   InstructionMark im(this);
1802   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1803   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1804   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1805   emit_int8(0x2A);
1806   emit_operand(dst, src);
1807 }
1808 
1809 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
1810   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1811   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1812   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1813   emit_int8(0x2A);
1814   emit_int8((unsigned char)(0xC0 | encode));
1815 }
1816 
1817 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1818   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1819   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1820   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1821   emit_int8(0x5A);
1822   emit_int8((unsigned char)(0xC0 | encode));
1823 }
1824 
1825 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1826   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1827   InstructionMark im(this);
1828   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1829   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1830   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1831   emit_int8(0x5A);
1832   emit_operand(dst, src);
1833 }
1834 
1835 
1836 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1837   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1838   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1839   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1840   emit_int8(0x2C);
1841   emit_int8((unsigned char)(0xC0 | encode));
1842 }
1843 
1844 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1845   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1846   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1847   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1848   emit_int8(0x2C);
1849   emit_int8((unsigned char)(0xC0 | encode));
1850 }
1851 
1852 void Assembler::cvttpd2dq(XMMRegister dst, XMMRegister src) {
1853   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1854   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
1855   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1856   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1857   emit_int8((unsigned char)0xE6);
1858   emit_int8((unsigned char)(0xC0 | encode));
1859 }
1860 
1861 void Assembler::decl(Address dst) {
1862   // Don't use it directly. Use MacroAssembler::decrement() instead.
1863   InstructionMark im(this);
1864   prefix(dst);
1865   emit_int8((unsigned char)0xFF);
1866   emit_operand(rcx, dst);
1867 }
1868 
1869 void Assembler::divsd(XMMRegister dst, Address src) {
1870   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1871   InstructionMark im(this);
1872   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1873   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1874   attributes.set_rex_vex_w_reverted();
1875   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1876   emit_int8(0x5E);
1877   emit_operand(dst, src);
1878 }
1879 
1880 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1881   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1882   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1883   attributes.set_rex_vex_w_reverted();
1884   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1885   emit_int8(0x5E);
1886   emit_int8((unsigned char)(0xC0 | encode));
1887 }
1888 
1889 void Assembler::divss(XMMRegister dst, Address src) {
1890   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1891   InstructionMark im(this);
1892   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1893   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1894   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1895   emit_int8(0x5E);
1896   emit_operand(dst, src);
1897 }
1898 
1899 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1900   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1901   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1902   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1903   emit_int8(0x5E);
1904   emit_int8((unsigned char)(0xC0 | encode));
1905 }
1906 
1907 void Assembler::emms() {
1908   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1909   emit_int8(0x0F);
1910   emit_int8(0x77);
1911 }
1912 
1913 void Assembler::hlt() {
1914   emit_int8((unsigned char)0xF4);
1915 }
1916 
1917 void Assembler::idivl(Register src) {
1918   int encode = prefix_and_encode(src->encoding());
1919   emit_int8((unsigned char)0xF7);
1920   emit_int8((unsigned char)(0xF8 | encode));
1921 }
1922 
1923 void Assembler::divl(Register src) { // Unsigned
1924   int encode = prefix_and_encode(src->encoding());
1925   emit_int8((unsigned char)0xF7);
1926   emit_int8((unsigned char)(0xF0 | encode));
1927 }
1928 
1929 void Assembler::imull(Register src) {
1930   int encode = prefix_and_encode(src->encoding());
1931   emit_int8((unsigned char)0xF7);
1932   emit_int8((unsigned char)(0xE8 | encode));
1933 }
1934 
1935 void Assembler::imull(Register dst, Register src) {
1936   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1937   emit_int8(0x0F);
1938   emit_int8((unsigned char)0xAF);
1939   emit_int8((unsigned char)(0xC0 | encode));
1940 }
1941 
1942 
1943 void Assembler::imull(Register dst, Register src, int value) {
1944   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1945   if (is8bit(value)) {
1946     emit_int8(0x6B);
1947     emit_int8((unsigned char)(0xC0 | encode));
1948     emit_int8(value & 0xFF);
1949   } else {
1950     emit_int8(0x69);
1951     emit_int8((unsigned char)(0xC0 | encode));
1952     emit_int32(value);
1953   }
1954 }
1955 
1956 void Assembler::imull(Register dst, Address src) {
1957   InstructionMark im(this);
1958   prefix(src, dst);
1959   emit_int8(0x0F);
1960   emit_int8((unsigned char) 0xAF);
1961   emit_operand(dst, src);
1962 }
1963 
1964 
1965 void Assembler::incl(Address dst) {
1966   // Don't use it directly. Use MacroAssembler::increment() instead.
1967   InstructionMark im(this);
1968   prefix(dst);
1969   emit_int8((unsigned char)0xFF);
1970   emit_operand(rax, dst);
1971 }
1972 
1973 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
1974   InstructionMark im(this);
1975   assert((0 <= cc) && (cc < 16), "illegal cc");
1976   if (L.is_bound()) {
1977     address dst = target(L);
1978     assert(dst != NULL, "jcc most probably wrong");
1979 
1980     const int short_size = 2;
1981     const int long_size = 6;
1982     intptr_t offs = (intptr_t)dst - (intptr_t)pc();
1983     if (maybe_short && is8bit(offs - short_size)) {
1984       // 0111 tttn #8-bit disp
1985       emit_int8(0x70 | cc);
1986       emit_int8((offs - short_size) & 0xFF);
1987     } else {
1988       // 0000 1111 1000 tttn #32-bit disp
1989       assert(is_simm32(offs - long_size),
1990              "must be 32bit offset (call4)");
1991       emit_int8(0x0F);
1992       emit_int8((unsigned char)(0x80 | cc));
1993       emit_int32(offs - long_size);
1994     }
1995   } else {
1996     // Note: could eliminate cond. jumps to this jump if condition
1997     //       is the same however, seems to be rather unlikely case.
1998     // Note: use jccb() if label to be bound is very close to get
1999     //       an 8-bit displacement
2000     L.add_patch_at(code(), locator());
2001     emit_int8(0x0F);
2002     emit_int8((unsigned char)(0x80 | cc));
2003     emit_int32(0);
2004   }
2005 }
2006 
2007 void Assembler::jccb(Condition cc, Label& L) {
2008   if (L.is_bound()) {
2009     const int short_size = 2;
2010     address entry = target(L);
2011 #ifdef ASSERT
2012     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2013     intptr_t delta = short_branch_delta();
2014     if (delta != 0) {
2015       dist += (dist < 0 ? (-delta) :delta);
2016     }
2017     assert(is8bit(dist), "Dispacement too large for a short jmp");
2018 #endif
2019     intptr_t offs = (intptr_t)entry - (intptr_t)pc();
2020     // 0111 tttn #8-bit disp
2021     emit_int8(0x70 | cc);
2022     emit_int8((offs - short_size) & 0xFF);
2023   } else {
2024     InstructionMark im(this);
2025     L.add_patch_at(code(), locator());
2026     emit_int8(0x70 | cc);
2027     emit_int8(0);
2028   }
2029 }
2030 
2031 void Assembler::jmp(Address adr) {
2032   InstructionMark im(this);
2033   prefix(adr);
2034   emit_int8((unsigned char)0xFF);
2035   emit_operand(rsp, adr);
2036 }
2037 
2038 void Assembler::jmp(Label& L, bool maybe_short) {
2039   if (L.is_bound()) {
2040     address entry = target(L);
2041     assert(entry != NULL, "jmp most probably wrong");
2042     InstructionMark im(this);
2043     const int short_size = 2;
2044     const int long_size = 5;
2045     intptr_t offs = entry - pc();
2046     if (maybe_short && is8bit(offs - short_size)) {
2047       emit_int8((unsigned char)0xEB);
2048       emit_int8((offs - short_size) & 0xFF);
2049     } else {
2050       emit_int8((unsigned char)0xE9);
2051       emit_int32(offs - long_size);
2052     }
2053   } else {
2054     // By default, forward jumps are always 32-bit displacements, since
2055     // we can't yet know where the label will be bound.  If you're sure that
2056     // the forward jump will not run beyond 256 bytes, use jmpb to
2057     // force an 8-bit displacement.
2058     InstructionMark im(this);
2059     L.add_patch_at(code(), locator());
2060     emit_int8((unsigned char)0xE9);
2061     emit_int32(0);
2062   }
2063 }
2064 
2065 void Assembler::jmp(Register entry) {
2066   int encode = prefix_and_encode(entry->encoding());
2067   emit_int8((unsigned char)0xFF);
2068   emit_int8((unsigned char)(0xE0 | encode));
2069 }
2070 
2071 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
2072   InstructionMark im(this);
2073   emit_int8((unsigned char)0xE9);
2074   assert(dest != NULL, "must have a target");
2075   intptr_t disp = dest - (pc() + sizeof(int32_t));
2076   assert(is_simm32(disp), "must be 32bit offset (jmp)");
2077   emit_data(disp, rspec.reloc(), call32_operand);
2078 }
2079 
2080 void Assembler::jmpb(Label& L) {
2081   if (L.is_bound()) {
2082     const int short_size = 2;
2083     address entry = target(L);
2084     assert(entry != NULL, "jmp most probably wrong");
2085 #ifdef ASSERT
2086     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2087     intptr_t delta = short_branch_delta();
2088     if (delta != 0) {
2089       dist += (dist < 0 ? (-delta) :delta);
2090     }
2091     assert(is8bit(dist), "Dispacement too large for a short jmp");
2092 #endif
2093     intptr_t offs = entry - pc();
2094     emit_int8((unsigned char)0xEB);
2095     emit_int8((offs - short_size) & 0xFF);
2096   } else {
2097     InstructionMark im(this);
2098     L.add_patch_at(code(), locator());
2099     emit_int8((unsigned char)0xEB);
2100     emit_int8(0);
2101   }
2102 }
2103 
2104 void Assembler::ldmxcsr( Address src) {
2105   if (UseAVX > 0 ) {
2106     InstructionMark im(this);
2107     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2108     vex_prefix(src, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2109     emit_int8((unsigned char)0xAE);
2110     emit_operand(as_Register(2), src);
2111   } else {
2112     NOT_LP64(assert(VM_Version::supports_sse(), ""));
2113     InstructionMark im(this);
2114     prefix(src);
2115     emit_int8(0x0F);
2116     emit_int8((unsigned char)0xAE);
2117     emit_operand(as_Register(2), src);
2118   }
2119 }
2120 
2121 void Assembler::leal(Register dst, Address src) {
2122   InstructionMark im(this);
2123 #ifdef _LP64
2124   emit_int8(0x67); // addr32
2125   prefix(src, dst);
2126 #endif // LP64
2127   emit_int8((unsigned char)0x8D);
2128   emit_operand(dst, src);
2129 }
2130 
2131 void Assembler::lfence() {
2132   emit_int8(0x0F);
2133   emit_int8((unsigned char)0xAE);
2134   emit_int8((unsigned char)0xE8);
2135 }
2136 
2137 void Assembler::lock() {
2138   emit_int8((unsigned char)0xF0);
2139 }
2140 
2141 void Assembler::lzcntl(Register dst, Register src) {
2142   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
2143   emit_int8((unsigned char)0xF3);
2144   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2145   emit_int8(0x0F);
2146   emit_int8((unsigned char)0xBD);
2147   emit_int8((unsigned char)(0xC0 | encode));
2148 }
2149 
2150 // Emit mfence instruction
2151 void Assembler::mfence() {
2152   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
2153   emit_int8(0x0F);
2154   emit_int8((unsigned char)0xAE);
2155   emit_int8((unsigned char)0xF0);
2156 }
2157 
2158 void Assembler::mov(Register dst, Register src) {
2159   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2160 }
2161 
2162 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
2163   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2164   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2165   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2166   attributes.set_rex_vex_w_reverted();
2167   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2168   emit_int8(0x28);
2169   emit_int8((unsigned char)(0xC0 | encode));
2170 }
2171 
2172 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
2173   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2174   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2175   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2176   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2177   emit_int8(0x28);
2178   emit_int8((unsigned char)(0xC0 | encode));
2179 }
2180 
2181 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
2182   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2183   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2184   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2185   emit_int8(0x16);
2186   emit_int8((unsigned char)(0xC0 | encode));
2187 }
2188 
2189 void Assembler::movb(Register dst, Address src) {
2190   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2191   InstructionMark im(this);
2192   prefix(src, dst, true);
2193   emit_int8((unsigned char)0x8A);
2194   emit_operand(dst, src);
2195 }
2196 
2197 void Assembler::movddup(XMMRegister dst, XMMRegister src) {
2198   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
2199   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2200   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2201   attributes.set_rex_vex_w_reverted();
2202   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2203   emit_int8(0x12);
2204   emit_int8(0xC0 | encode);
2205 }
2206 
2207 void Assembler::kmovbl(KRegister dst, Register src) {
2208   assert(VM_Version::supports_avx512dq(), "");
2209   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2210   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2211   emit_int8((unsigned char)0x92);
2212   emit_int8((unsigned char)(0xC0 | encode));
2213 }
2214 
2215 void Assembler::kmovbl(Register dst, KRegister src) {
2216   assert(VM_Version::supports_avx512dq(), "");
2217   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2218   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2219   emit_int8((unsigned char)0x93);
2220   emit_int8((unsigned char)(0xC0 | encode));
2221 }
2222 
2223 void Assembler::kmovwl(KRegister dst, Register src) {
2224   assert(VM_Version::supports_evex(), "");
2225   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2226   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2227   emit_int8((unsigned char)0x92);
2228   emit_int8((unsigned char)(0xC0 | encode));
2229 }
2230 
2231 void Assembler::kmovwl(Register dst, KRegister src) {
2232   assert(VM_Version::supports_evex(), "");
2233   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2234   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2235   emit_int8((unsigned char)0x93);
2236   emit_int8((unsigned char)(0xC0 | encode));
2237 }
2238 
2239 void Assembler::kmovwl(KRegister dst, Address src) {
2240   assert(VM_Version::supports_evex(), "");
2241   InstructionMark im(this);
2242   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2243   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2244   emit_int8((unsigned char)0x90);
2245   emit_operand((Register)dst, src);
2246 }
2247 
2248 void Assembler::kmovdl(KRegister dst, Register src) {
2249   assert(VM_Version::supports_avx512bw(), "");
2250   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2251   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2252   emit_int8((unsigned char)0x92);
2253   emit_int8((unsigned char)(0xC0 | encode));
2254 }
2255 
2256 void Assembler::kmovdl(Register dst, KRegister src) {
2257   assert(VM_Version::supports_avx512bw(), "");
2258   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2259   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2260   emit_int8((unsigned char)0x93);
2261   emit_int8((unsigned char)(0xC0 | encode));
2262 }
2263 
2264 void Assembler::kmovql(KRegister dst, KRegister src) {
2265   assert(VM_Version::supports_avx512bw(), "");
2266   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2267   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2268   emit_int8((unsigned char)0x90);
2269   emit_int8((unsigned char)(0xC0 | encode));
2270 }
2271 
2272 void Assembler::kmovql(KRegister dst, Address src) {
2273   assert(VM_Version::supports_avx512bw(), "");
2274   InstructionMark im(this);
2275   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2276   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2277   emit_int8((unsigned char)0x90);
2278   emit_operand((Register)dst, src);
2279 }
2280 
2281 void Assembler::kmovql(Address dst, KRegister src) {
2282   assert(VM_Version::supports_avx512bw(), "");
2283   InstructionMark im(this);
2284   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2285   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2286   emit_int8((unsigned char)0x90);
2287   emit_operand((Register)src, dst);
2288 }
2289 
2290 void Assembler::kmovql(KRegister dst, Register src) {
2291   assert(VM_Version::supports_avx512bw(), "");
2292   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2293   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2294   emit_int8((unsigned char)0x92);
2295   emit_int8((unsigned char)(0xC0 | encode));
2296 }
2297 
2298 void Assembler::kmovql(Register dst, KRegister src) {
2299   assert(VM_Version::supports_avx512bw(), "");
2300   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2301   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2302   emit_int8((unsigned char)0x93);
2303   emit_int8((unsigned char)(0xC0 | encode));
2304 }
2305 
2306 void Assembler::knotwl(KRegister dst, KRegister src) {
2307   assert(VM_Version::supports_evex(), "");
2308   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2309   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2310   emit_int8((unsigned char)0x44);
2311   emit_int8((unsigned char)(0xC0 | encode));
2312 }
2313 
2314 // This instruction produces ZF or CF flags
2315 void Assembler::kortestbl(KRegister src1, KRegister src2) {
2316   assert(VM_Version::supports_avx512dq(), "");
2317   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2318   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2319   emit_int8((unsigned char)0x98);
2320   emit_int8((unsigned char)(0xC0 | encode));
2321 }
2322 
2323 // This instruction produces ZF or CF flags
2324 void Assembler::kortestwl(KRegister src1, KRegister src2) {
2325   assert(VM_Version::supports_evex(), "");
2326   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2327   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2328   emit_int8((unsigned char)0x98);
2329   emit_int8((unsigned char)(0xC0 | encode));
2330 }
2331 
2332 // This instruction produces ZF or CF flags
2333 void Assembler::kortestdl(KRegister src1, KRegister src2) {
2334   assert(VM_Version::supports_avx512bw(), "");
2335   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2336   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2337   emit_int8((unsigned char)0x98);
2338   emit_int8((unsigned char)(0xC0 | encode));
2339 }
2340 
2341 // This instruction produces ZF or CF flags
2342 void Assembler::kortestql(KRegister src1, KRegister src2) {
2343   assert(VM_Version::supports_avx512bw(), "");
2344   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2345   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2346   emit_int8((unsigned char)0x98);
2347   emit_int8((unsigned char)(0xC0 | encode));
2348 }
2349 
2350 // This instruction produces ZF or CF flags
2351 void Assembler::ktestql(KRegister src1, KRegister src2) {
2352   assert(VM_Version::supports_avx512bw(), "");
2353   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2354   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2355   emit_int8((unsigned char)0x99);
2356   emit_int8((unsigned char)(0xC0 | encode));
2357 }
2358 
2359 void Assembler::ktestq(KRegister src1, KRegister src2) {
2360   assert(VM_Version::supports_avx512bw(), "");
2361   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2362   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2363   emit_int8((unsigned char)0x99);
2364   emit_int8((unsigned char)(0xC0 | encode));
2365 }
2366 
2367 void Assembler::ktestd(KRegister src1, KRegister src2) {
2368   assert(VM_Version::supports_avx512bw(), "");
2369   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2370   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2371   emit_int8((unsigned char)0x99);
2372   emit_int8((unsigned char)(0xC0 | encode));
2373 }
2374 
2375 void Assembler::movb(Address dst, int imm8) {
2376   InstructionMark im(this);
2377    prefix(dst);
2378   emit_int8((unsigned char)0xC6);
2379   emit_operand(rax, dst, 1);
2380   emit_int8(imm8);
2381 }
2382 
2383 
2384 void Assembler::movb(Address dst, Register src) {
2385   assert(src->has_byte_register(), "must have byte register");
2386   InstructionMark im(this);
2387   prefix(dst, src, true);
2388   emit_int8((unsigned char)0x88);
2389   emit_operand(src, dst);
2390 }
2391 
2392 void Assembler::movdl(XMMRegister dst, Register src) {
2393   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2394   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2395   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2396   emit_int8(0x6E);
2397   emit_int8((unsigned char)(0xC0 | encode));
2398 }
2399 
2400 void Assembler::movdl(Register dst, XMMRegister src) {
2401   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2402   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2403   // swap src/dst to get correct prefix
2404   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2405   emit_int8(0x7E);
2406   emit_int8((unsigned char)(0xC0 | encode));
2407 }
2408 
2409 void Assembler::movdl(XMMRegister dst, Address src) {
2410   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2411   InstructionMark im(this);
2412   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2413   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2414   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2415   emit_int8(0x6E);
2416   emit_operand(dst, src);
2417 }
2418 
2419 void Assembler::movdl(Address dst, XMMRegister src) {
2420   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2421   InstructionMark im(this);
2422   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2423   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2424   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2425   emit_int8(0x7E);
2426   emit_operand(src, dst);
2427 }
2428 
2429 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
2430   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2431   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2432   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2433   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2434   emit_int8(0x6F);
2435   emit_int8((unsigned char)(0xC0 | encode));
2436 }
2437 
2438 void Assembler::movdqa(XMMRegister dst, Address src) {
2439   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2440   InstructionMark im(this);
2441   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2442   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2443   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2444   emit_int8(0x6F);
2445   emit_operand(dst, src);
2446 }
2447 
2448 void Assembler::movdqu(XMMRegister dst, Address src) {
2449   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2450   InstructionMark im(this);
2451   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2452   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2453   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2454   emit_int8(0x6F);
2455   emit_operand(dst, src);
2456 }
2457 
2458 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
2459   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2460   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2461   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2462   emit_int8(0x6F);
2463   emit_int8((unsigned char)(0xC0 | encode));
2464 }
2465 
2466 void Assembler::movdqu(Address dst, XMMRegister src) {
2467   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2468   InstructionMark im(this);
2469   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2470   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2471   attributes.reset_is_clear_context();
2472   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2473   emit_int8(0x7F);
2474   emit_operand(src, dst);
2475 }
2476 
2477 // Move Unaligned 256bit Vector
2478 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2479   assert(UseAVX > 0, "");
2480   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2481   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2482   emit_int8(0x6F);
2483   emit_int8((unsigned char)(0xC0 | encode));
2484 }
2485 
2486 void Assembler::vmovdqu(XMMRegister dst, Address src) {
2487   assert(UseAVX > 0, "");
2488   InstructionMark im(this);
2489   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2490   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2491   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2492   emit_int8(0x6F);
2493   emit_operand(dst, src);
2494 }
2495 
2496 void Assembler::vmovdqu(Address dst, XMMRegister src) {
2497   assert(UseAVX > 0, "");
2498   InstructionMark im(this);
2499   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2500   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2501   attributes.reset_is_clear_context();
2502   // swap src<->dst for encoding
2503   assert(src != xnoreg, "sanity");
2504   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2505   emit_int8(0x7F);
2506   emit_operand(src, dst);
2507 }
2508 
2509 // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
2510 void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) {
2511   assert(VM_Version::supports_evex(), "");
2512   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2513   attributes.set_is_evex_instruction();
2514   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2515   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2516   emit_int8(0x6F);
2517   emit_int8((unsigned char)(0xC0 | encode));
2518 }
2519 
2520 void Assembler::evmovdqub(XMMRegister dst, Address src, int vector_len) {
2521   assert(VM_Version::supports_evex(), "");
2522   InstructionMark im(this);
2523   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2524   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2525   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2526   attributes.set_is_evex_instruction();
2527   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2528   emit_int8(0x6F);
2529   emit_operand(dst, src);
2530 }
2531 
2532 void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
2533   assert(VM_Version::supports_evex(), "");
2534   assert(src != xnoreg, "sanity");
2535   InstructionMark im(this);
2536   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2537   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2538   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2539   attributes.set_is_evex_instruction();
2540   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2541   emit_int8(0x7F);
2542   emit_operand(src, dst);
2543 }
2544 
2545 void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, int vector_len) {
2546   assert(VM_Version::supports_avx512vlbw(), "");
2547   assert(is_vector_masking(), "");    // For stub code use only
2548   InstructionMark im(this);
2549   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
2550   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2551   attributes.set_embedded_opmask_register_specifier(mask);
2552   attributes.set_is_evex_instruction();
2553   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2554   emit_int8(0x6F);
2555   emit_operand(dst, src);
2556 }
2557 
2558 void Assembler::evmovdquw(XMMRegister dst, Address src, int vector_len) {
2559   assert(VM_Version::supports_evex(), "");
2560   InstructionMark im(this);
2561   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2562   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2563   attributes.set_is_evex_instruction();
2564   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2565   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2566   emit_int8(0x6F);
2567   emit_operand(dst, src);
2568 }
2569 
2570 void Assembler::evmovdquw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
2571   assert(is_vector_masking(), "");
2572   assert(VM_Version::supports_avx512vlbw(), "");
2573   InstructionMark im(this);
2574   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
2575   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2576   attributes.set_embedded_opmask_register_specifier(mask);
2577   attributes.set_is_evex_instruction();
2578   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2579   emit_int8(0x6F);
2580   emit_operand(dst, src);
2581 }
2582 
2583 void Assembler::evmovdquw(Address dst, XMMRegister src, int vector_len) {
2584   assert(VM_Version::supports_evex(), "");
2585   assert(src != xnoreg, "sanity");
2586   InstructionMark im(this);
2587   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2588   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2589   attributes.set_is_evex_instruction();
2590   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2591   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2592   emit_int8(0x7F);
2593   emit_operand(src, dst);
2594 }
2595 
2596 void Assembler::evmovdquw(Address dst, KRegister mask, XMMRegister src, int vector_len) {
2597   assert(VM_Version::supports_avx512vlbw(), "");
2598   assert(src != xnoreg, "sanity");
2599   InstructionMark im(this);
2600   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2601   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2602   attributes.reset_is_clear_context();
2603   attributes.set_embedded_opmask_register_specifier(mask);
2604   attributes.set_is_evex_instruction();
2605   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2606   emit_int8(0x7F);
2607   emit_operand(src, dst);
2608 }
2609 
2610 void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
2611   assert(VM_Version::supports_evex(), "");
2612   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2613   attributes.set_is_evex_instruction();
2614   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2615   emit_int8(0x6F);
2616   emit_int8((unsigned char)(0xC0 | encode));
2617 }
2618 
2619 void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) {
2620   assert(VM_Version::supports_evex(), "");
2621   InstructionMark im(this);
2622   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false , /* uses_vl */ true);
2623   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2624   attributes.set_is_evex_instruction();
2625   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2626   emit_int8(0x6F);
2627   emit_operand(dst, src);
2628 }
2629 
2630 void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
2631   assert(VM_Version::supports_evex(), "");
2632   assert(src != xnoreg, "sanity");
2633   InstructionMark im(this);
2634   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2635   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2636   attributes.reset_is_clear_context();
2637   attributes.set_is_evex_instruction();
2638   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2639   emit_int8(0x7F);
2640   emit_operand(src, dst);
2641 }
2642 
2643 void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
2644   assert(VM_Version::supports_evex(), "");
2645   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2646   attributes.set_is_evex_instruction();
2647   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2648   emit_int8(0x6F);
2649   emit_int8((unsigned char)(0xC0 | encode));
2650 }
2651 
2652 void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
2653   assert(VM_Version::supports_evex(), "");
2654   InstructionMark im(this);
2655   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2656   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2657   attributes.set_is_evex_instruction();
2658   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2659   emit_int8(0x6F);
2660   emit_operand(dst, src);
2661 }
2662 
2663 void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
2664   assert(VM_Version::supports_evex(), "");
2665   assert(src != xnoreg, "sanity");
2666   InstructionMark im(this);
2667   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2668   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2669   attributes.reset_is_clear_context();
2670   attributes.set_is_evex_instruction();
2671   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2672   emit_int8(0x7F);
2673   emit_operand(src, dst);
2674 }
2675 
2676 // Uses zero extension on 64bit
2677 
2678 void Assembler::movl(Register dst, int32_t imm32) {
2679   int encode = prefix_and_encode(dst->encoding());
2680   emit_int8((unsigned char)(0xB8 | encode));
2681   emit_int32(imm32);
2682 }
2683 
2684 void Assembler::movl(Register dst, Register src) {
2685   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2686   emit_int8((unsigned char)0x8B);
2687   emit_int8((unsigned char)(0xC0 | encode));
2688 }
2689 
2690 void Assembler::movl(Register dst, Address src) {
2691   InstructionMark im(this);
2692   prefix(src, dst);
2693   emit_int8((unsigned char)0x8B);
2694   emit_operand(dst, src);
2695 }
2696 
2697 void Assembler::movl(Address dst, int32_t imm32) {
2698   InstructionMark im(this);
2699   prefix(dst);
2700   emit_int8((unsigned char)0xC7);
2701   emit_operand(rax, dst, 4);
2702   emit_int32(imm32);
2703 }
2704 
2705 void Assembler::movl(Address dst, Register src) {
2706   InstructionMark im(this);
2707   prefix(dst, src);
2708   emit_int8((unsigned char)0x89);
2709   emit_operand(src, dst);
2710 }
2711 
2712 // New cpus require to use movsd and movss to avoid partial register stall
2713 // when loading from memory. But for old Opteron use movlpd instead of movsd.
2714 // The selection is done in MacroAssembler::movdbl() and movflt().
2715 void Assembler::movlpd(XMMRegister dst, Address src) {
2716   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2717   InstructionMark im(this);
2718   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2719   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2720   attributes.set_rex_vex_w_reverted();
2721   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2722   emit_int8(0x12);
2723   emit_operand(dst, src);
2724 }
2725 
2726 void Assembler::movq( MMXRegister dst, Address src ) {
2727   assert( VM_Version::supports_mmx(), "" );
2728   emit_int8(0x0F);
2729   emit_int8(0x6F);
2730   emit_operand(dst, src);
2731 }
2732 
2733 void Assembler::movq( Address dst, MMXRegister src ) {
2734   assert( VM_Version::supports_mmx(), "" );
2735   emit_int8(0x0F);
2736   emit_int8(0x7F);
2737   // workaround gcc (3.2.1-7a) bug
2738   // In that version of gcc with only an emit_operand(MMX, Address)
2739   // gcc will tail jump and try and reverse the parameters completely
2740   // obliterating dst in the process. By having a version available
2741   // that doesn't need to swap the args at the tail jump the bug is
2742   // avoided.
2743   emit_operand(dst, src);
2744 }
2745 
2746 void Assembler::movq(XMMRegister dst, Address src) {
2747   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2748   InstructionMark im(this);
2749   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2750   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2751   attributes.set_rex_vex_w_reverted();
2752   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2753   emit_int8(0x7E);
2754   emit_operand(dst, src);
2755 }
2756 
2757 void Assembler::movq(Address dst, XMMRegister src) {
2758   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2759   InstructionMark im(this);
2760   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2761   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2762   attributes.set_rex_vex_w_reverted();
2763   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2764   emit_int8((unsigned char)0xD6);
2765   emit_operand(src, dst);
2766 }
2767 
2768 void Assembler::movsbl(Register dst, Address src) { // movsxb
2769   InstructionMark im(this);
2770   prefix(src, dst);
2771   emit_int8(0x0F);
2772   emit_int8((unsigned char)0xBE);
2773   emit_operand(dst, src);
2774 }
2775 
2776 void Assembler::movsbl(Register dst, Register src) { // movsxb
2777   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2778   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2779   emit_int8(0x0F);
2780   emit_int8((unsigned char)0xBE);
2781   emit_int8((unsigned char)(0xC0 | encode));
2782 }
2783 
2784 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
2785   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2786   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2787   attributes.set_rex_vex_w_reverted();
2788   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2789   emit_int8(0x10);
2790   emit_int8((unsigned char)(0xC0 | encode));
2791 }
2792 
2793 void Assembler::movsd(XMMRegister dst, Address src) {
2794   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2795   InstructionMark im(this);
2796   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2797   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2798   attributes.set_rex_vex_w_reverted();
2799   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2800   emit_int8(0x10);
2801   emit_operand(dst, src);
2802 }
2803 
2804 void Assembler::movsd(Address dst, XMMRegister src) {
2805   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2806   InstructionMark im(this);
2807   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2808   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2809   attributes.reset_is_clear_context();
2810   attributes.set_rex_vex_w_reverted();
2811   simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2812   emit_int8(0x11);
2813   emit_operand(src, dst);
2814 }
2815 
2816 void Assembler::movss(XMMRegister dst, XMMRegister src) {
2817   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2818   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2819   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2820   emit_int8(0x10);
2821   emit_int8((unsigned char)(0xC0 | encode));
2822 }
2823 
2824 void Assembler::movss(XMMRegister dst, Address src) {
2825   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2826   InstructionMark im(this);
2827   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2828   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2829   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2830   emit_int8(0x10);
2831   emit_operand(dst, src);
2832 }
2833 
2834 void Assembler::movss(Address dst, XMMRegister src) {
2835   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2836   InstructionMark im(this);
2837   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2838   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2839   attributes.reset_is_clear_context();
2840   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2841   emit_int8(0x11);
2842   emit_operand(src, dst);
2843 }
2844 
2845 void Assembler::movswl(Register dst, Address src) { // movsxw
2846   InstructionMark im(this);
2847   prefix(src, dst);
2848   emit_int8(0x0F);
2849   emit_int8((unsigned char)0xBF);
2850   emit_operand(dst, src);
2851 }
2852 
2853 void Assembler::movswl(Register dst, Register src) { // movsxw
2854   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2855   emit_int8(0x0F);
2856   emit_int8((unsigned char)0xBF);
2857   emit_int8((unsigned char)(0xC0 | encode));
2858 }
2859 
2860 void Assembler::movw(Address dst, int imm16) {
2861   InstructionMark im(this);
2862 
2863   emit_int8(0x66); // switch to 16-bit mode
2864   prefix(dst);
2865   emit_int8((unsigned char)0xC7);
2866   emit_operand(rax, dst, 2);
2867   emit_int16(imm16);
2868 }
2869 
2870 void Assembler::movw(Register dst, Address src) {
2871   InstructionMark im(this);
2872   emit_int8(0x66);
2873   prefix(src, dst);
2874   emit_int8((unsigned char)0x8B);
2875   emit_operand(dst, src);
2876 }
2877 
2878 void Assembler::movw(Address dst, Register src) {
2879   InstructionMark im(this);
2880   emit_int8(0x66);
2881   prefix(dst, src);
2882   emit_int8((unsigned char)0x89);
2883   emit_operand(src, dst);
2884 }
2885 
2886 void Assembler::movzbl(Register dst, Address src) { // movzxb
2887   InstructionMark im(this);
2888   prefix(src, dst);
2889   emit_int8(0x0F);
2890   emit_int8((unsigned char)0xB6);
2891   emit_operand(dst, src);
2892 }
2893 
2894 void Assembler::movzbl(Register dst, Register src) { // movzxb
2895   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2896   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2897   emit_int8(0x0F);
2898   emit_int8((unsigned char)0xB6);
2899   emit_int8(0xC0 | encode);
2900 }
2901 
2902 void Assembler::movzwl(Register dst, Address src) { // movzxw
2903   InstructionMark im(this);
2904   prefix(src, dst);
2905   emit_int8(0x0F);
2906   emit_int8((unsigned char)0xB7);
2907   emit_operand(dst, src);
2908 }
2909 
2910 void Assembler::movzwl(Register dst, Register src) { // movzxw
2911   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2912   emit_int8(0x0F);
2913   emit_int8((unsigned char)0xB7);
2914   emit_int8(0xC0 | encode);
2915 }
2916 
2917 void Assembler::mull(Address src) {
2918   InstructionMark im(this);
2919   prefix(src);
2920   emit_int8((unsigned char)0xF7);
2921   emit_operand(rsp, src);
2922 }
2923 
2924 void Assembler::mull(Register src) {
2925   int encode = prefix_and_encode(src->encoding());
2926   emit_int8((unsigned char)0xF7);
2927   emit_int8((unsigned char)(0xE0 | encode));
2928 }
2929 
2930 void Assembler::mulsd(XMMRegister dst, Address src) {
2931   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2932   InstructionMark im(this);
2933   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2934   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2935   attributes.set_rex_vex_w_reverted();
2936   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2937   emit_int8(0x59);
2938   emit_operand(dst, src);
2939 }
2940 
2941 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
2942   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2943   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2944   attributes.set_rex_vex_w_reverted();
2945   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2946   emit_int8(0x59);
2947   emit_int8((unsigned char)(0xC0 | encode));
2948 }
2949 
2950 void Assembler::mulss(XMMRegister dst, Address src) {
2951   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2952   InstructionMark im(this);
2953   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2954   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2955   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2956   emit_int8(0x59);
2957   emit_operand(dst, src);
2958 }
2959 
2960 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
2961   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2962   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2963   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2964   emit_int8(0x59);
2965   emit_int8((unsigned char)(0xC0 | encode));
2966 }
2967 
2968 void Assembler::negl(Register dst) {
2969   int encode = prefix_and_encode(dst->encoding());
2970   emit_int8((unsigned char)0xF7);
2971   emit_int8((unsigned char)(0xD8 | encode));
2972 }
2973 
2974 void Assembler::nop(int i) {
2975 #ifdef ASSERT
2976   assert(i > 0, " ");
2977   // The fancy nops aren't currently recognized by debuggers making it a
2978   // pain to disassemble code while debugging. If asserts are on clearly
2979   // speed is not an issue so simply use the single byte traditional nop
2980   // to do alignment.
2981 
2982   for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
2983   return;
2984 
2985 #endif // ASSERT
2986 
2987   if (UseAddressNop && VM_Version::is_intel()) {
2988     //
2989     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
2990     //  1: 0x90
2991     //  2: 0x66 0x90
2992     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2993     //  4: 0x0F 0x1F 0x40 0x00
2994     //  5: 0x0F 0x1F 0x44 0x00 0x00
2995     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2996     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2997     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2998     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2999     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3000     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3001 
3002     // The rest coding is Intel specific - don't use consecutive address nops
3003 
3004     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3005     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3006     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3007     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3008 
3009     while(i >= 15) {
3010       // For Intel don't generate consecutive addess nops (mix with regular nops)
3011       i -= 15;
3012       emit_int8(0x66);   // size prefix
3013       emit_int8(0x66);   // size prefix
3014       emit_int8(0x66);   // size prefix
3015       addr_nop_8();
3016       emit_int8(0x66);   // size prefix
3017       emit_int8(0x66);   // size prefix
3018       emit_int8(0x66);   // size prefix
3019       emit_int8((unsigned char)0x90);
3020                          // nop
3021     }
3022     switch (i) {
3023       case 14:
3024         emit_int8(0x66); // size prefix
3025       case 13:
3026         emit_int8(0x66); // size prefix
3027       case 12:
3028         addr_nop_8();
3029         emit_int8(0x66); // size prefix
3030         emit_int8(0x66); // size prefix
3031         emit_int8(0x66); // size prefix
3032         emit_int8((unsigned char)0x90);
3033                          // nop
3034         break;
3035       case 11:
3036         emit_int8(0x66); // size prefix
3037       case 10:
3038         emit_int8(0x66); // size prefix
3039       case 9:
3040         emit_int8(0x66); // size prefix
3041       case 8:
3042         addr_nop_8();
3043         break;
3044       case 7:
3045         addr_nop_7();
3046         break;
3047       case 6:
3048         emit_int8(0x66); // size prefix
3049       case 5:
3050         addr_nop_5();
3051         break;
3052       case 4:
3053         addr_nop_4();
3054         break;
3055       case 3:
3056         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3057         emit_int8(0x66); // size prefix
3058       case 2:
3059         emit_int8(0x66); // size prefix
3060       case 1:
3061         emit_int8((unsigned char)0x90);
3062                          // nop
3063         break;
3064       default:
3065         assert(i == 0, " ");
3066     }
3067     return;
3068   }
3069   if (UseAddressNop && VM_Version::is_amd()) {
3070     //
3071     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
3072     //  1: 0x90
3073     //  2: 0x66 0x90
3074     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3075     //  4: 0x0F 0x1F 0x40 0x00
3076     //  5: 0x0F 0x1F 0x44 0x00 0x00
3077     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3078     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3079     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3080     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3081     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3082     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3083 
3084     // The rest coding is AMD specific - use consecutive address nops
3085 
3086     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3087     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3088     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3089     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3090     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3091     //     Size prefixes (0x66) are added for larger sizes
3092 
3093     while(i >= 22) {
3094       i -= 11;
3095       emit_int8(0x66); // size prefix
3096       emit_int8(0x66); // size prefix
3097       emit_int8(0x66); // size prefix
3098       addr_nop_8();
3099     }
3100     // Generate first nop for size between 21-12
3101     switch (i) {
3102       case 21:
3103         i -= 1;
3104         emit_int8(0x66); // size prefix
3105       case 20:
3106       case 19:
3107         i -= 1;
3108         emit_int8(0x66); // size prefix
3109       case 18:
3110       case 17:
3111         i -= 1;
3112         emit_int8(0x66); // size prefix
3113       case 16:
3114       case 15:
3115         i -= 8;
3116         addr_nop_8();
3117         break;
3118       case 14:
3119       case 13:
3120         i -= 7;
3121         addr_nop_7();
3122         break;
3123       case 12:
3124         i -= 6;
3125         emit_int8(0x66); // size prefix
3126         addr_nop_5();
3127         break;
3128       default:
3129         assert(i < 12, " ");
3130     }
3131 
3132     // Generate second nop for size between 11-1
3133     switch (i) {
3134       case 11:
3135         emit_int8(0x66); // size prefix
3136       case 10:
3137         emit_int8(0x66); // size prefix
3138       case 9:
3139         emit_int8(0x66); // size prefix
3140       case 8:
3141         addr_nop_8();
3142         break;
3143       case 7:
3144         addr_nop_7();
3145         break;
3146       case 6:
3147         emit_int8(0x66); // size prefix
3148       case 5:
3149         addr_nop_5();
3150         break;
3151       case 4:
3152         addr_nop_4();
3153         break;
3154       case 3:
3155         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3156         emit_int8(0x66); // size prefix
3157       case 2:
3158         emit_int8(0x66); // size prefix
3159       case 1:
3160         emit_int8((unsigned char)0x90);
3161                          // nop
3162         break;
3163       default:
3164         assert(i == 0, " ");
3165     }
3166     return;
3167   }
3168 
3169   if (UseAddressNop && VM_Version::is_zx()) {
3170     //
3171     // Using multi-bytes nops "0x0F 0x1F [address]" for ZX
3172     //  1: 0x90
3173     //  2: 0x66 0x90
3174     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3175     //  4: 0x0F 0x1F 0x40 0x00
3176     //  5: 0x0F 0x1F 0x44 0x00 0x00
3177     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3178     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3179     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3180     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3181     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3182     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3183 
3184     // The rest coding is ZX specific - don't use consecutive address nops
3185 
3186     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3187     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3188     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3189     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3190 
3191     while (i >= 15) {
3192       // For ZX don't generate consecutive addess nops (mix with regular nops)
3193       i -= 15;
3194       emit_int8(0x66);   // size prefix
3195       emit_int8(0x66);   // size prefix
3196       emit_int8(0x66);   // size prefix
3197       addr_nop_8();
3198       emit_int8(0x66);   // size prefix
3199       emit_int8(0x66);   // size prefix
3200       emit_int8(0x66);   // size prefix
3201       emit_int8((unsigned char)0x90);
3202                          // nop
3203     }
3204     switch (i) {
3205       case 14:
3206         emit_int8(0x66); // size prefix
3207       case 13:
3208         emit_int8(0x66); // size prefix
3209       case 12:
3210         addr_nop_8();
3211         emit_int8(0x66); // size prefix
3212         emit_int8(0x66); // size prefix
3213         emit_int8(0x66); // size prefix
3214         emit_int8((unsigned char)0x90);
3215                          // nop
3216         break;
3217       case 11:
3218         emit_int8(0x66); // size prefix
3219       case 10:
3220         emit_int8(0x66); // size prefix
3221       case 9:
3222         emit_int8(0x66); // size prefix
3223       case 8:
3224         addr_nop_8();
3225         break;
3226       case 7:
3227         addr_nop_7();
3228         break;
3229       case 6:
3230         emit_int8(0x66); // size prefix
3231       case 5:
3232         addr_nop_5();
3233         break;
3234       case 4:
3235         addr_nop_4();
3236         break;
3237       case 3:
3238         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3239         emit_int8(0x66); // size prefix
3240       case 2:
3241         emit_int8(0x66); // size prefix
3242       case 1:
3243         emit_int8((unsigned char)0x90);
3244                          // nop
3245         break;
3246       default:
3247         assert(i == 0, " ");
3248     }
3249     return;
3250   }
3251 
3252   // Using nops with size prefixes "0x66 0x90".
3253   // From AMD Optimization Guide:
3254   //  1: 0x90
3255   //  2: 0x66 0x90
3256   //  3: 0x66 0x66 0x90
3257   //  4: 0x66 0x66 0x66 0x90
3258   //  5: 0x66 0x66 0x90 0x66 0x90
3259   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
3260   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
3261   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
3262   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3263   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3264   //
3265   while(i > 12) {
3266     i -= 4;
3267     emit_int8(0x66); // size prefix
3268     emit_int8(0x66);
3269     emit_int8(0x66);
3270     emit_int8((unsigned char)0x90);
3271                      // nop
3272   }
3273   // 1 - 12 nops
3274   if(i > 8) {
3275     if(i > 9) {
3276       i -= 1;
3277       emit_int8(0x66);
3278     }
3279     i -= 3;
3280     emit_int8(0x66);
3281     emit_int8(0x66);
3282     emit_int8((unsigned char)0x90);
3283   }
3284   // 1 - 8 nops
3285   if(i > 4) {
3286     if(i > 6) {
3287       i -= 1;
3288       emit_int8(0x66);
3289     }
3290     i -= 3;
3291     emit_int8(0x66);
3292     emit_int8(0x66);
3293     emit_int8((unsigned char)0x90);
3294   }
3295   switch (i) {
3296     case 4:
3297       emit_int8(0x66);
3298     case 3:
3299       emit_int8(0x66);
3300     case 2:
3301       emit_int8(0x66);
3302     case 1:
3303       emit_int8((unsigned char)0x90);
3304       break;
3305     default:
3306       assert(i == 0, " ");
3307   }
3308 }
3309 
3310 void Assembler::notl(Register dst) {
3311   int encode = prefix_and_encode(dst->encoding());
3312   emit_int8((unsigned char)0xF7);
3313   emit_int8((unsigned char)(0xD0 | encode));
3314 }
3315 
3316 void Assembler::orl(Address dst, int32_t imm32) {
3317   InstructionMark im(this);
3318   prefix(dst);
3319   emit_arith_operand(0x81, rcx, dst, imm32);
3320 }
3321 
3322 void Assembler::orl(Register dst, int32_t imm32) {
3323   prefix(dst);
3324   emit_arith(0x81, 0xC8, dst, imm32);
3325 }
3326 
3327 void Assembler::orl(Register dst, Address src) {
3328   InstructionMark im(this);
3329   prefix(src, dst);
3330   emit_int8(0x0B);
3331   emit_operand(dst, src);
3332 }
3333 
3334 void Assembler::orl(Register dst, Register src) {
3335   (void) prefix_and_encode(dst->encoding(), src->encoding());
3336   emit_arith(0x0B, 0xC0, dst, src);
3337 }
3338 
3339 void Assembler::orl(Address dst, Register src) {
3340   InstructionMark im(this);
3341   prefix(dst, src);
3342   emit_int8(0x09);
3343   emit_operand(src, dst);
3344 }
3345 
3346 void Assembler::packuswb(XMMRegister dst, Address src) {
3347   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3348   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3349   InstructionMark im(this);
3350   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3351   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3352   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3353   emit_int8(0x67);
3354   emit_operand(dst, src);
3355 }
3356 
3357 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
3358   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3359   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3360   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3361   emit_int8(0x67);
3362   emit_int8((unsigned char)(0xC0 | encode));
3363 }
3364 
3365 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3366   assert(UseAVX > 0, "some form of AVX must be enabled");
3367   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3368   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3369   emit_int8(0x67);
3370   emit_int8((unsigned char)(0xC0 | encode));
3371 }
3372 
3373 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
3374   assert(VM_Version::supports_avx2(), "");
3375   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3376   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3377   emit_int8(0x00);
3378   emit_int8(0xC0 | encode);
3379   emit_int8(imm8);
3380 }
3381 
3382 void Assembler::vperm2i128(XMMRegister dst,  XMMRegister nds, XMMRegister src, int imm8) {
3383   assert(VM_Version::supports_avx2(), "");
3384   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3385   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3386   emit_int8(0x46);
3387   emit_int8(0xC0 | encode);
3388   emit_int8(imm8);
3389 }
3390 
3391 void Assembler::vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
3392   assert(VM_Version::supports_avx(), "");
3393   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3394   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3395   emit_int8(0x06);
3396   emit_int8(0xC0 | encode);
3397   emit_int8(imm8);
3398 }
3399 
3400 
3401 void Assembler::pause() {
3402   emit_int8((unsigned char)0xF3);
3403   emit_int8((unsigned char)0x90);
3404 }
3405 
3406 void Assembler::ud2() {
3407   emit_int8(0x0F);
3408   emit_int8(0x0B);
3409 }
3410 
3411 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
3412   assert(VM_Version::supports_sse4_2(), "");
3413   InstructionMark im(this);
3414   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3415   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3416   emit_int8(0x61);
3417   emit_operand(dst, src);
3418   emit_int8(imm8);
3419 }
3420 
3421 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
3422   assert(VM_Version::supports_sse4_2(), "");
3423   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3424   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3425   emit_int8(0x61);
3426   emit_int8((unsigned char)(0xC0 | encode));
3427   emit_int8(imm8);
3428 }
3429 
3430 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3431 void Assembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
3432   assert(VM_Version::supports_sse2(), "");
3433   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3434   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3435   emit_int8(0x74);
3436   emit_int8((unsigned char)(0xC0 | encode));
3437 }
3438 
3439 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3440 void Assembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3441   assert(VM_Version::supports_avx(), "");
3442   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3443   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3444   emit_int8(0x74);
3445   emit_int8((unsigned char)(0xC0 | encode));
3446 }
3447 
3448 // In this context, kdst is written the mask used to process the equal components
3449 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3450   assert(VM_Version::supports_avx512bw(), "");
3451   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3452   attributes.set_is_evex_instruction();
3453   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3454   emit_int8(0x74);
3455   emit_int8((unsigned char)(0xC0 | encode));
3456 }
3457 
3458 void Assembler::evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3459   assert(VM_Version::supports_avx512vlbw(), "");
3460   InstructionMark im(this);
3461   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3462   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3463   attributes.set_is_evex_instruction();
3464   int dst_enc = kdst->encoding();
3465   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3466   emit_int8(0x64);
3467   emit_operand(as_Register(dst_enc), src);
3468 }
3469 
3470 void Assembler::evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3471   assert(is_vector_masking(), "");
3472   assert(VM_Version::supports_avx512vlbw(), "");
3473   InstructionMark im(this);
3474   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3475   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3476   attributes.reset_is_clear_context();
3477   attributes.set_embedded_opmask_register_specifier(mask);
3478   attributes.set_is_evex_instruction();
3479   int dst_enc = kdst->encoding();
3480   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3481   emit_int8(0x64);
3482   emit_operand(as_Register(dst_enc), src);
3483 }
3484 
3485 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3486   assert(VM_Version::supports_avx512vlbw(), "");
3487   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3488   attributes.set_is_evex_instruction();
3489   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3490   emit_int8(0x3E);
3491   emit_int8((unsigned char)(0xC0 | encode));
3492   emit_int8(vcc);
3493 }
3494 
3495 void Assembler::evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3496   assert(is_vector_masking(), "");
3497   assert(VM_Version::supports_avx512vlbw(), "");
3498   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3499   attributes.reset_is_clear_context();
3500   attributes.set_embedded_opmask_register_specifier(mask);
3501   attributes.set_is_evex_instruction();
3502   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3503   emit_int8(0x3E);
3504   emit_int8((unsigned char)(0xC0 | encode));
3505   emit_int8(vcc);
3506 }
3507 
3508 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len) {
3509   assert(VM_Version::supports_avx512vlbw(), "");
3510   InstructionMark im(this);
3511   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3512   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3513   attributes.set_is_evex_instruction();
3514   int dst_enc = kdst->encoding();
3515   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3516   emit_int8(0x3E);
3517   emit_operand(as_Register(dst_enc), src);
3518   emit_int8(vcc);
3519 }
3520 
3521 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3522   assert(VM_Version::supports_avx512bw(), "");
3523   InstructionMark im(this);
3524   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3525   attributes.set_is_evex_instruction();
3526   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3527   int dst_enc = kdst->encoding();
3528   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3529   emit_int8(0x74);
3530   emit_operand(as_Register(dst_enc), src);
3531 }
3532 
3533 void Assembler::evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3534   assert(VM_Version::supports_avx512vlbw(), "");
3535   assert(is_vector_masking(), "");    // For stub code use only
3536   InstructionMark im(this);
3537   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_reg_mask */ false, /* uses_vl */ false);
3538   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3539   attributes.reset_is_clear_context();
3540   attributes.set_embedded_opmask_register_specifier(mask);
3541   attributes.set_is_evex_instruction();
3542   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3543   emit_int8(0x74);
3544   emit_operand(as_Register(kdst->encoding()), src);
3545 }
3546 
3547 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3548 void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
3549   assert(VM_Version::supports_sse2(), "");
3550   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3551   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3552   emit_int8(0x75);
3553   emit_int8((unsigned char)(0xC0 | encode));
3554 }
3555 
3556 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3557 void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3558   assert(VM_Version::supports_avx(), "");
3559   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3560   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3561   emit_int8(0x75);
3562   emit_int8((unsigned char)(0xC0 | encode));
3563 }
3564 
3565 // In this context, kdst is written the mask used to process the equal components
3566 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3567   assert(VM_Version::supports_avx512bw(), "");
3568   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3569   attributes.set_is_evex_instruction();
3570   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3571   emit_int8(0x75);
3572   emit_int8((unsigned char)(0xC0 | encode));
3573 }
3574 
3575 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3576   assert(VM_Version::supports_avx512bw(), "");
3577   InstructionMark im(this);
3578   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3579   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3580   attributes.set_is_evex_instruction();
3581   int dst_enc = kdst->encoding();
3582   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3583   emit_int8(0x75);
3584   emit_operand(as_Register(dst_enc), src);
3585 }
3586 
3587 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3588 void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
3589   assert(VM_Version::supports_sse2(), "");
3590   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3591   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3592   emit_int8(0x76);
3593   emit_int8((unsigned char)(0xC0 | encode));
3594 }
3595 
3596 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3597 void Assembler::vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3598   assert(VM_Version::supports_avx(), "");
3599   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3600   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3601   emit_int8(0x76);
3602   emit_int8((unsigned char)(0xC0 | encode));
3603 }
3604 
3605 // In this context, kdst is written the mask used to process the equal components
3606 void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3607   assert(VM_Version::supports_evex(), "");
3608   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3609   attributes.set_is_evex_instruction();
3610   attributes.reset_is_clear_context();
3611   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3612   emit_int8(0x76);
3613   emit_int8((unsigned char)(0xC0 | encode));
3614 }
3615 
3616 void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3617   assert(VM_Version::supports_evex(), "");
3618   InstructionMark im(this);
3619   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3620   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3621   attributes.reset_is_clear_context();
3622   attributes.set_is_evex_instruction();
3623   int dst_enc = kdst->encoding();
3624   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3625   emit_int8(0x76);
3626   emit_operand(as_Register(dst_enc), src);
3627 }
3628 
3629 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3630 void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) {
3631   assert(VM_Version::supports_sse4_1(), "");
3632   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3633   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3634   emit_int8(0x29);
3635   emit_int8((unsigned char)(0xC0 | encode));
3636 }
3637 
3638 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3639 void Assembler::vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3640   assert(VM_Version::supports_avx(), "");
3641   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3642   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3643   emit_int8(0x29);
3644   emit_int8((unsigned char)(0xC0 | encode));
3645 }
3646 
3647 // In this context, kdst is written the mask used to process the equal components
3648 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3649   assert(VM_Version::supports_evex(), "");
3650   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3651   attributes.reset_is_clear_context();
3652   attributes.set_is_evex_instruction();
3653   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3654   emit_int8(0x29);
3655   emit_int8((unsigned char)(0xC0 | encode));
3656 }
3657 
3658 // In this context, kdst is written the mask used to process the equal components
3659 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3660   assert(VM_Version::supports_evex(), "");
3661   InstructionMark im(this);
3662   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3663   attributes.reset_is_clear_context();
3664   attributes.set_is_evex_instruction();
3665   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
3666   int dst_enc = kdst->encoding();
3667   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3668   emit_int8(0x29);
3669   emit_operand(as_Register(dst_enc), src);
3670 }
3671 
3672 void Assembler::pmovmskb(Register dst, XMMRegister src) {
3673   assert(VM_Version::supports_sse2(), "");
3674   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3675   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3676   emit_int8((unsigned char)0xD7);
3677   emit_int8((unsigned char)(0xC0 | encode));
3678 }
3679 
3680 void Assembler::vpmovmskb(Register dst, XMMRegister src) {
3681   assert(VM_Version::supports_avx2(), "");
3682   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3683   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3684   emit_int8((unsigned char)0xD7);
3685   emit_int8((unsigned char)(0xC0 | encode));
3686 }
3687 
3688 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
3689   assert(VM_Version::supports_sse4_1(), "");
3690   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3691   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3692   emit_int8(0x16);
3693   emit_int8((unsigned char)(0xC0 | encode));
3694   emit_int8(imm8);
3695 }
3696 
3697 void Assembler::pextrd(Address dst, XMMRegister src, int imm8) {
3698   assert(VM_Version::supports_sse4_1(), "");
3699   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3700   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3701   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3702   emit_int8(0x16);
3703   emit_operand(src, dst);
3704   emit_int8(imm8);
3705 }
3706 
3707 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
3708   assert(VM_Version::supports_sse4_1(), "");
3709   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3710   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3711   emit_int8(0x16);
3712   emit_int8((unsigned char)(0xC0 | encode));
3713   emit_int8(imm8);
3714 }
3715 
3716 void Assembler::pextrq(Address dst, XMMRegister src, int imm8) {
3717   assert(VM_Version::supports_sse4_1(), "");
3718   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3719   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3720   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3721   emit_int8(0x16);
3722   emit_operand(src, dst);
3723   emit_int8(imm8);
3724 }
3725 
3726 void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
3727   assert(VM_Version::supports_sse2(), "");
3728   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3729   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3730   emit_int8((unsigned char)0xC5);
3731   emit_int8((unsigned char)(0xC0 | encode));
3732   emit_int8(imm8);
3733 }
3734 
3735 void Assembler::pextrw(Address dst, XMMRegister src, int imm8) {
3736   assert(VM_Version::supports_sse4_1(), "");
3737   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3738   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
3739   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3740   emit_int8((unsigned char)0x15);
3741   emit_operand(src, dst);
3742   emit_int8(imm8);
3743 }
3744 
3745 void Assembler::pextrb(Address dst, XMMRegister src, int imm8) {
3746   assert(VM_Version::supports_sse4_1(), "");
3747   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3748   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
3749   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3750   emit_int8(0x14);
3751   emit_operand(src, dst);
3752   emit_int8(imm8);
3753 }
3754 
3755 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
3756   assert(VM_Version::supports_sse4_1(), "");
3757   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3758   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3759   emit_int8(0x22);
3760   emit_int8((unsigned char)(0xC0 | encode));
3761   emit_int8(imm8);
3762 }
3763 
3764 void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) {
3765   assert(VM_Version::supports_sse4_1(), "");
3766   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3767   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3768   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3769   emit_int8(0x22);
3770   emit_operand(dst,src);
3771   emit_int8(imm8);
3772 }
3773 
3774 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
3775   assert(VM_Version::supports_sse4_1(), "");
3776   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3777   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3778   emit_int8(0x22);
3779   emit_int8((unsigned char)(0xC0 | encode));
3780   emit_int8(imm8);
3781 }
3782 
3783 void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) {
3784   assert(VM_Version::supports_sse4_1(), "");
3785   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3786   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3787   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3788   emit_int8(0x22);
3789   emit_operand(dst, src);
3790   emit_int8(imm8);
3791 }
3792 
3793 void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
3794   assert(VM_Version::supports_sse2(), "");
3795   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3796   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3797   emit_int8((unsigned char)0xC4);
3798   emit_int8((unsigned char)(0xC0 | encode));
3799   emit_int8(imm8);
3800 }
3801 
3802 void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) {
3803   assert(VM_Version::supports_sse2(), "");
3804   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3805   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
3806   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3807   emit_int8((unsigned char)0xC4);
3808   emit_operand(dst, src);
3809   emit_int8(imm8);
3810 }
3811 
3812 void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) {
3813   assert(VM_Version::supports_sse4_1(), "");
3814   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3815   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
3816   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3817   emit_int8(0x20);
3818   emit_operand(dst, src);
3819   emit_int8(imm8);
3820 }
3821 
3822 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
3823   assert(VM_Version::supports_sse4_1(), "");
3824   InstructionMark im(this);
3825   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3826   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3827   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3828   emit_int8(0x30);
3829   emit_operand(dst, src);
3830 }
3831 
3832 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
3833   assert(VM_Version::supports_sse4_1(), "");
3834   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3835   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3836   emit_int8(0x30);
3837   emit_int8((unsigned char)(0xC0 | encode));
3838 }
3839 
3840 void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
3841   assert(VM_Version::supports_avx(), "");
3842   InstructionMark im(this);
3843   assert(dst != xnoreg, "sanity");
3844   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3845   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3846   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3847   emit_int8(0x30);
3848   emit_operand(dst, src);
3849 }
3850 
3851 void Assembler::evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
3852   assert(is_vector_masking(), "");
3853   assert(VM_Version::supports_avx512vlbw(), "");
3854   assert(dst != xnoreg, "sanity");
3855   InstructionMark im(this);
3856   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3857   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3858   attributes.set_embedded_opmask_register_specifier(mask);
3859   attributes.set_is_evex_instruction();
3860   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3861   emit_int8(0x30);
3862   emit_operand(dst, src);
3863 }
3864 
3865 void Assembler::evpmovwb(Address dst, XMMRegister src, int vector_len) {
3866   assert(VM_Version::supports_avx512vlbw(), "");
3867   assert(src != xnoreg, "sanity");
3868   InstructionMark im(this);
3869   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3870   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3871   attributes.set_is_evex_instruction();
3872   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
3873   emit_int8(0x30);
3874   emit_operand(src, dst);
3875 }
3876 
3877 void Assembler::evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len) {
3878   assert(is_vector_masking(), "");
3879   assert(VM_Version::supports_avx512vlbw(), "");
3880   assert(src != xnoreg, "sanity");
3881   InstructionMark im(this);
3882   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3883   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3884   attributes.reset_is_clear_context();
3885   attributes.set_embedded_opmask_register_specifier(mask);
3886   attributes.set_is_evex_instruction();
3887   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
3888   emit_int8(0x30);
3889   emit_operand(src, dst);
3890 }
3891 
3892 // generic
3893 void Assembler::pop(Register dst) {
3894   int encode = prefix_and_encode(dst->encoding());
3895   emit_int8(0x58 | encode);
3896 }
3897 
3898 void Assembler::popcntl(Register dst, Address src) {
3899   assert(VM_Version::supports_popcnt(), "must support");
3900   InstructionMark im(this);
3901   emit_int8((unsigned char)0xF3);
3902   prefix(src, dst);
3903   emit_int8(0x0F);
3904   emit_int8((unsigned char)0xB8);
3905   emit_operand(dst, src);
3906 }
3907 
3908 void Assembler::popcntl(Register dst, Register src) {
3909   assert(VM_Version::supports_popcnt(), "must support");
3910   emit_int8((unsigned char)0xF3);
3911   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3912   emit_int8(0x0F);
3913   emit_int8((unsigned char)0xB8);
3914   emit_int8((unsigned char)(0xC0 | encode));
3915 }
3916 
3917 void Assembler::vpopcntd(XMMRegister dst, XMMRegister src, int vector_len) {
3918   assert(VM_Version::supports_vpopcntdq(), "must support vpopcntdq feature");
3919   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3920   attributes.set_is_evex_instruction();
3921   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3922   emit_int8(0x55);
3923   emit_int8((unsigned char)(0xC0 | encode));
3924 }
3925 
3926 void Assembler::popf() {
3927   emit_int8((unsigned char)0x9D);
3928 }
3929 
3930 #ifndef _LP64 // no 32bit push/pop on amd64
3931 void Assembler::popl(Address dst) {
3932   // NOTE: this will adjust stack by 8byte on 64bits
3933   InstructionMark im(this);
3934   prefix(dst);
3935   emit_int8((unsigned char)0x8F);
3936   emit_operand(rax, dst);
3937 }
3938 #endif
3939 
3940 void Assembler::prefetch_prefix(Address src) {
3941   prefix(src);
3942   emit_int8(0x0F);
3943 }
3944 
3945 void Assembler::prefetchnta(Address src) {
3946   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3947   InstructionMark im(this);
3948   prefetch_prefix(src);
3949   emit_int8(0x18);
3950   emit_operand(rax, src); // 0, src
3951 }
3952 
3953 void Assembler::prefetchr(Address src) {
3954   assert(VM_Version::supports_3dnow_prefetch(), "must support");
3955   InstructionMark im(this);
3956   prefetch_prefix(src);
3957   emit_int8(0x0D);
3958   emit_operand(rax, src); // 0, src
3959 }
3960 
3961 void Assembler::prefetcht0(Address src) {
3962   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3963   InstructionMark im(this);
3964   prefetch_prefix(src);
3965   emit_int8(0x18);
3966   emit_operand(rcx, src); // 1, src
3967 }
3968 
3969 void Assembler::prefetcht1(Address src) {
3970   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3971   InstructionMark im(this);
3972   prefetch_prefix(src);
3973   emit_int8(0x18);
3974   emit_operand(rdx, src); // 2, src
3975 }
3976 
3977 void Assembler::prefetcht2(Address src) {
3978   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3979   InstructionMark im(this);
3980   prefetch_prefix(src);
3981   emit_int8(0x18);
3982   emit_operand(rbx, src); // 3, src
3983 }
3984 
3985 void Assembler::prefetchw(Address src) {
3986   assert(VM_Version::supports_3dnow_prefetch(), "must support");
3987   InstructionMark im(this);
3988   prefetch_prefix(src);
3989   emit_int8(0x0D);
3990   emit_operand(rcx, src); // 1, src
3991 }
3992 
3993 void Assembler::prefix(Prefix p) {
3994   emit_int8(p);
3995 }
3996 
3997 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
3998   assert(VM_Version::supports_ssse3(), "");
3999   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4000   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4001   emit_int8(0x00);
4002   emit_int8((unsigned char)(0xC0 | encode));
4003 }
4004 
4005 void Assembler::vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4006   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4007          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4008          0, "");
4009   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4010   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4011   emit_int8(0x00);
4012   emit_int8((unsigned char)(0xC0 | encode));
4013 }
4014 
4015 void Assembler::pshufb(XMMRegister dst, Address src) {
4016   assert(VM_Version::supports_ssse3(), "");
4017   InstructionMark im(this);
4018   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4019   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4020   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4021   emit_int8(0x00);
4022   emit_operand(dst, src);
4023 }
4024 
4025 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
4026   assert(isByte(mode), "invalid value");
4027   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4028   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
4029   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4030   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4031   emit_int8(0x70);
4032   emit_int8((unsigned char)(0xC0 | encode));
4033   emit_int8(mode & 0xFF);
4034 }
4035 
4036 void Assembler::vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len) {
4037   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4038          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4039          0, "");
4040   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4041   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4042   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4043   emit_int8(0x70);
4044   emit_int8((unsigned char)(0xC0 | encode));
4045   emit_int8(mode & 0xFF);
4046 }
4047 
4048 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
4049   assert(isByte(mode), "invalid value");
4050   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4051   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4052   InstructionMark im(this);
4053   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4054   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4055   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4056   emit_int8(0x70);
4057   emit_operand(dst, src);
4058   emit_int8(mode & 0xFF);
4059 }
4060 
4061 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
4062   assert(isByte(mode), "invalid value");
4063   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4064   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4065   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4066   emit_int8(0x70);
4067   emit_int8((unsigned char)(0xC0 | encode));
4068   emit_int8(mode & 0xFF);
4069 }
4070 
4071 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
4072   assert(isByte(mode), "invalid value");
4073   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4074   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4075   InstructionMark im(this);
4076   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4077   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4078   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4079   emit_int8(0x70);
4080   emit_operand(dst, src);
4081   emit_int8(mode & 0xFF);
4082 }
4083 
4084 void Assembler::psrldq(XMMRegister dst, int shift) {
4085   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
4086   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4087   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4088   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4089   emit_int8(0x73);
4090   emit_int8((unsigned char)(0xC0 | encode));
4091   emit_int8(shift);
4092 }
4093 
4094 void Assembler::pslldq(XMMRegister dst, int shift) {
4095   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
4096   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4097   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4098   // XMM7 is for /7 encoding: 66 0F 73 /7 ib
4099   int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4100   emit_int8(0x73);
4101   emit_int8((unsigned char)(0xC0 | encode));
4102   emit_int8(shift);
4103 }
4104 
4105 void Assembler::ptest(XMMRegister dst, Address src) {
4106   assert(VM_Version::supports_sse4_1(), "");
4107   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4108   InstructionMark im(this);
4109   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4110   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4111   emit_int8(0x17);
4112   emit_operand(dst, src);
4113 }
4114 
4115 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
4116   assert(VM_Version::supports_sse4_1(), "");
4117   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4118   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4119   emit_int8(0x17);
4120   emit_int8((unsigned char)(0xC0 | encode));
4121 }
4122 
4123 void Assembler::vptest(XMMRegister dst, Address src) {
4124   assert(VM_Version::supports_avx(), "");
4125   InstructionMark im(this);
4126   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4127   assert(dst != xnoreg, "sanity");
4128   // swap src<->dst for encoding
4129   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4130   emit_int8(0x17);
4131   emit_operand(dst, src);
4132 }
4133 
4134 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
4135   assert(VM_Version::supports_avx(), "");
4136   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4137   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4138   emit_int8(0x17);
4139   emit_int8((unsigned char)(0xC0 | encode));
4140 }
4141 
4142 void Assembler::punpcklbw(XMMRegister dst, Address src) {
4143   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4144   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4145   InstructionMark im(this);
4146   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
4147   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4148   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4149   emit_int8(0x60);
4150   emit_operand(dst, src);
4151 }
4152 
4153 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
4154   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4155   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
4156   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4157   emit_int8(0x60);
4158   emit_int8((unsigned char)(0xC0 | encode));
4159 }
4160 
4161 void Assembler::punpckldq(XMMRegister dst, Address src) {
4162   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4163   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4164   InstructionMark im(this);
4165   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4166   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4167   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4168   emit_int8(0x62);
4169   emit_operand(dst, src);
4170 }
4171 
4172 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
4173   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4174   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4175   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4176   emit_int8(0x62);
4177   emit_int8((unsigned char)(0xC0 | encode));
4178 }
4179 
4180 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
4181   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4182   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4183   attributes.set_rex_vex_w_reverted();
4184   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4185   emit_int8(0x6C);
4186   emit_int8((unsigned char)(0xC0 | encode));
4187 }
4188 
4189 void Assembler::push(int32_t imm32) {
4190   // in 64bits we push 64bits onto the stack but only
4191   // take a 32bit immediate
4192   emit_int8(0x68);
4193   emit_int32(imm32);
4194 }
4195 
4196 void Assembler::push(Register src) {
4197   int encode = prefix_and_encode(src->encoding());
4198 
4199   emit_int8(0x50 | encode);
4200 }
4201 
4202 void Assembler::pushf() {
4203   emit_int8((unsigned char)0x9C);
4204 }
4205 
4206 #ifndef _LP64 // no 32bit push/pop on amd64
4207 void Assembler::pushl(Address src) {
4208   // Note this will push 64bit on 64bit
4209   InstructionMark im(this);
4210   prefix(src);
4211   emit_int8((unsigned char)0xFF);
4212   emit_operand(rsi, src);
4213 }
4214 #endif
4215 
4216 void Assembler::rcll(Register dst, int imm8) {
4217   assert(isShiftCount(imm8), "illegal shift count");
4218   int encode = prefix_and_encode(dst->encoding());
4219   if (imm8 == 1) {
4220     emit_int8((unsigned char)0xD1);
4221     emit_int8((unsigned char)(0xD0 | encode));
4222   } else {
4223     emit_int8((unsigned char)0xC1);
4224     emit_int8((unsigned char)0xD0 | encode);
4225     emit_int8(imm8);
4226   }
4227 }
4228 
4229 void Assembler::rcpps(XMMRegister dst, XMMRegister src) {
4230   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4231   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4232   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4233   emit_int8(0x53);
4234   emit_int8((unsigned char)(0xC0 | encode));
4235 }
4236 
4237 void Assembler::rcpss(XMMRegister dst, XMMRegister src) {
4238   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4239   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4240   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4241   emit_int8(0x53);
4242   emit_int8((unsigned char)(0xC0 | encode));
4243 }
4244 
4245 void Assembler::rdtsc() {
4246   emit_int8((unsigned char)0x0F);
4247   emit_int8((unsigned char)0x31);
4248 }
4249 
4250 // copies data from [esi] to [edi] using rcx pointer sized words
4251 // generic
4252 void Assembler::rep_mov() {
4253   emit_int8((unsigned char)0xF3);
4254   // MOVSQ
4255   LP64_ONLY(prefix(REX_W));
4256   emit_int8((unsigned char)0xA5);
4257 }
4258 
4259 // sets rcx bytes with rax, value at [edi]
4260 void Assembler::rep_stosb() {
4261   emit_int8((unsigned char)0xF3); // REP
4262   LP64_ONLY(prefix(REX_W));
4263   emit_int8((unsigned char)0xAA); // STOSB
4264 }
4265 
4266 // sets rcx pointer sized words with rax, value at [edi]
4267 // generic
4268 void Assembler::rep_stos() {
4269   emit_int8((unsigned char)0xF3); // REP
4270   LP64_ONLY(prefix(REX_W));       // LP64:STOSQ, LP32:STOSD
4271   emit_int8((unsigned char)0xAB);
4272 }
4273 
4274 // scans rcx pointer sized words at [edi] for occurance of rax,
4275 // generic
4276 void Assembler::repne_scan() { // repne_scan
4277   emit_int8((unsigned char)0xF2);
4278   // SCASQ
4279   LP64_ONLY(prefix(REX_W));
4280   emit_int8((unsigned char)0xAF);
4281 }
4282 
4283 #ifdef _LP64
4284 // scans rcx 4 byte words at [edi] for occurance of rax,
4285 // generic
4286 void Assembler::repne_scanl() { // repne_scan
4287   emit_int8((unsigned char)0xF2);
4288   // SCASL
4289   emit_int8((unsigned char)0xAF);
4290 }
4291 #endif
4292 
4293 void Assembler::ret(int imm16) {
4294   if (imm16 == 0) {
4295     emit_int8((unsigned char)0xC3);
4296   } else {
4297     emit_int8((unsigned char)0xC2);
4298     emit_int16(imm16);
4299   }
4300 }
4301 
4302 void Assembler::sahf() {
4303 #ifdef _LP64
4304   // Not supported in 64bit mode
4305   ShouldNotReachHere();
4306 #endif
4307   emit_int8((unsigned char)0x9E);
4308 }
4309 
4310 void Assembler::sarl(Register dst, int imm8) {
4311   int encode = prefix_and_encode(dst->encoding());
4312   assert(isShiftCount(imm8), "illegal shift count");
4313   if (imm8 == 1) {
4314     emit_int8((unsigned char)0xD1);
4315     emit_int8((unsigned char)(0xF8 | encode));
4316   } else {
4317     emit_int8((unsigned char)0xC1);
4318     emit_int8((unsigned char)(0xF8 | encode));
4319     emit_int8(imm8);
4320   }
4321 }
4322 
4323 void Assembler::sarl(Register dst) {
4324   int encode = prefix_and_encode(dst->encoding());
4325   emit_int8((unsigned char)0xD3);
4326   emit_int8((unsigned char)(0xF8 | encode));
4327 }
4328 
4329 void Assembler::sbbl(Address dst, int32_t imm32) {
4330   InstructionMark im(this);
4331   prefix(dst);
4332   emit_arith_operand(0x81, rbx, dst, imm32);
4333 }
4334 
4335 void Assembler::sbbl(Register dst, int32_t imm32) {
4336   prefix(dst);
4337   emit_arith(0x81, 0xD8, dst, imm32);
4338 }
4339 
4340 
4341 void Assembler::sbbl(Register dst, Address src) {
4342   InstructionMark im(this);
4343   prefix(src, dst);
4344   emit_int8(0x1B);
4345   emit_operand(dst, src);
4346 }
4347 
4348 void Assembler::sbbl(Register dst, Register src) {
4349   (void) prefix_and_encode(dst->encoding(), src->encoding());
4350   emit_arith(0x1B, 0xC0, dst, src);
4351 }
4352 
4353 void Assembler::setb(Condition cc, Register dst) {
4354   assert(0 <= cc && cc < 16, "illegal cc");
4355   int encode = prefix_and_encode(dst->encoding(), true);
4356   emit_int8(0x0F);
4357   emit_int8((unsigned char)0x90 | cc);
4358   emit_int8((unsigned char)(0xC0 | encode));
4359 }
4360 
4361 void Assembler::palignr(XMMRegister dst, XMMRegister src, int imm8) {
4362   assert(VM_Version::supports_ssse3(), "");
4363   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
4364   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4365   emit_int8((unsigned char)0x0F);
4366   emit_int8((unsigned char)(0xC0 | encode));
4367   emit_int8(imm8);
4368 }
4369 
4370 void Assembler::vpalignr(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
4371   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4372          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4373          0, "");
4374   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
4375   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4376   emit_int8((unsigned char)0x0F);
4377   emit_int8((unsigned char)(0xC0 | encode));
4378   emit_int8(imm8);
4379 }
4380 
4381 void Assembler::pblendw(XMMRegister dst, XMMRegister src, int imm8) {
4382   assert(VM_Version::supports_sse4_1(), "");
4383   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4384   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4385   emit_int8((unsigned char)0x0E);
4386   emit_int8((unsigned char)(0xC0 | encode));
4387   emit_int8(imm8);
4388 }
4389 
4390 void Assembler::sha1rnds4(XMMRegister dst, XMMRegister src, int imm8) {
4391   assert(VM_Version::supports_sha(), "");
4392   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, /* rex_w */ false);
4393   emit_int8((unsigned char)0xCC);
4394   emit_int8((unsigned char)(0xC0 | encode));
4395   emit_int8((unsigned char)imm8);
4396 }
4397 
4398 void Assembler::sha1nexte(XMMRegister dst, XMMRegister src) {
4399   assert(VM_Version::supports_sha(), "");
4400   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4401   emit_int8((unsigned char)0xC8);
4402   emit_int8((unsigned char)(0xC0 | encode));
4403 }
4404 
4405 void Assembler::sha1msg1(XMMRegister dst, XMMRegister src) {
4406   assert(VM_Version::supports_sha(), "");
4407   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4408   emit_int8((unsigned char)0xC9);
4409   emit_int8((unsigned char)(0xC0 | encode));
4410 }
4411 
4412 void Assembler::sha1msg2(XMMRegister dst, XMMRegister src) {
4413   assert(VM_Version::supports_sha(), "");
4414   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4415   emit_int8((unsigned char)0xCA);
4416   emit_int8((unsigned char)(0xC0 | encode));
4417 }
4418 
4419 // xmm0 is implicit additional source to this instruction.
4420 void Assembler::sha256rnds2(XMMRegister dst, XMMRegister src) {
4421   assert(VM_Version::supports_sha(), "");
4422   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4423   emit_int8((unsigned char)0xCB);
4424   emit_int8((unsigned char)(0xC0 | encode));
4425 }
4426 
4427 void Assembler::sha256msg1(XMMRegister dst, XMMRegister src) {
4428   assert(VM_Version::supports_sha(), "");
4429   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4430   emit_int8((unsigned char)0xCC);
4431   emit_int8((unsigned char)(0xC0 | encode));
4432 }
4433 
4434 void Assembler::sha256msg2(XMMRegister dst, XMMRegister src) {
4435   assert(VM_Version::supports_sha(), "");
4436   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4437   emit_int8((unsigned char)0xCD);
4438   emit_int8((unsigned char)(0xC0 | encode));
4439 }
4440 
4441 
4442 void Assembler::shll(Register dst, int imm8) {
4443   assert(isShiftCount(imm8), "illegal shift count");
4444   int encode = prefix_and_encode(dst->encoding());
4445   if (imm8 == 1 ) {
4446     emit_int8((unsigned char)0xD1);
4447     emit_int8((unsigned char)(0xE0 | encode));
4448   } else {
4449     emit_int8((unsigned char)0xC1);
4450     emit_int8((unsigned char)(0xE0 | encode));
4451     emit_int8(imm8);
4452   }
4453 }
4454 
4455 void Assembler::shll(Register dst) {
4456   int encode = prefix_and_encode(dst->encoding());
4457   emit_int8((unsigned char)0xD3);
4458   emit_int8((unsigned char)(0xE0 | encode));
4459 }
4460 
4461 void Assembler::shrl(Register dst, int imm8) {
4462   assert(isShiftCount(imm8), "illegal shift count");
4463   int encode = prefix_and_encode(dst->encoding());
4464   emit_int8((unsigned char)0xC1);
4465   emit_int8((unsigned char)(0xE8 | encode));
4466   emit_int8(imm8);
4467 }
4468 
4469 void Assembler::shrl(Register dst) {
4470   int encode = prefix_and_encode(dst->encoding());
4471   emit_int8((unsigned char)0xD3);
4472   emit_int8((unsigned char)(0xE8 | encode));
4473 }
4474 
4475 // copies a single word from [esi] to [edi]
4476 void Assembler::smovl() {
4477   emit_int8((unsigned char)0xA5);
4478 }
4479 
4480 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
4481   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4482   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4483   attributes.set_rex_vex_w_reverted();
4484   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4485   emit_int8(0x51);
4486   emit_int8((unsigned char)(0xC0 | encode));
4487 }
4488 
4489 void Assembler::sqrtsd(XMMRegister dst, Address src) {
4490   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4491   InstructionMark im(this);
4492   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4493   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4494   attributes.set_rex_vex_w_reverted();
4495   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4496   emit_int8(0x51);
4497   emit_operand(dst, src);
4498 }
4499 
4500 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
4501   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4502   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4503   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4504   emit_int8(0x51);
4505   emit_int8((unsigned char)(0xC0 | encode));
4506 }
4507 
4508 void Assembler::std() {
4509   emit_int8((unsigned char)0xFD);
4510 }
4511 
4512 void Assembler::sqrtss(XMMRegister dst, Address src) {
4513   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4514   InstructionMark im(this);
4515   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4516   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4517   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4518   emit_int8(0x51);
4519   emit_operand(dst, src);
4520 }
4521 
4522 void Assembler::stmxcsr( Address dst) {
4523   if (UseAVX > 0 ) {
4524     assert(VM_Version::supports_avx(), "");
4525     InstructionMark im(this);
4526     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4527     vex_prefix(dst, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4528     emit_int8((unsigned char)0xAE);
4529     emit_operand(as_Register(3), dst);
4530   } else {
4531     NOT_LP64(assert(VM_Version::supports_sse(), ""));
4532     InstructionMark im(this);
4533     prefix(dst);
4534     emit_int8(0x0F);
4535     emit_int8((unsigned char)0xAE);
4536     emit_operand(as_Register(3), dst);
4537   }
4538 }
4539 
4540 void Assembler::subl(Address dst, int32_t imm32) {
4541   InstructionMark im(this);
4542   prefix(dst);
4543   emit_arith_operand(0x81, rbp, dst, imm32);
4544 }
4545 
4546 void Assembler::subl(Address dst, Register src) {
4547   InstructionMark im(this);
4548   prefix(dst, src);
4549   emit_int8(0x29);
4550   emit_operand(src, dst);
4551 }
4552 
4553 void Assembler::subl(Register dst, int32_t imm32) {
4554   prefix(dst);
4555   emit_arith(0x81, 0xE8, dst, imm32);
4556 }
4557 
4558 // Force generation of a 4 byte immediate value even if it fits into 8bit
4559 void Assembler::subl_imm32(Register dst, int32_t imm32) {
4560   prefix(dst);
4561   emit_arith_imm32(0x81, 0xE8, dst, imm32);
4562 }
4563 
4564 void Assembler::subl(Register dst, Address src) {
4565   InstructionMark im(this);
4566   prefix(src, dst);
4567   emit_int8(0x2B);
4568   emit_operand(dst, src);
4569 }
4570 
4571 void Assembler::subl(Register dst, Register src) {
4572   (void) prefix_and_encode(dst->encoding(), src->encoding());
4573   emit_arith(0x2B, 0xC0, dst, src);
4574 }
4575 
4576 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
4577   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4578   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4579   attributes.set_rex_vex_w_reverted();
4580   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4581   emit_int8(0x5C);
4582   emit_int8((unsigned char)(0xC0 | encode));
4583 }
4584 
4585 void Assembler::subsd(XMMRegister dst, Address src) {
4586   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4587   InstructionMark im(this);
4588   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4589   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4590   attributes.set_rex_vex_w_reverted();
4591   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4592   emit_int8(0x5C);
4593   emit_operand(dst, src);
4594 }
4595 
4596 void Assembler::subss(XMMRegister dst, XMMRegister src) {
4597   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4598   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true , /* uses_vl */ false);
4599   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4600   emit_int8(0x5C);
4601   emit_int8((unsigned char)(0xC0 | encode));
4602 }
4603 
4604 void Assembler::subss(XMMRegister dst, Address src) {
4605   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4606   InstructionMark im(this);
4607   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4608   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4609   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4610   emit_int8(0x5C);
4611   emit_operand(dst, src);
4612 }
4613 
4614 void Assembler::testb(Register dst, int imm8) {
4615   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
4616   (void) prefix_and_encode(dst->encoding(), true);
4617   emit_arith_b(0xF6, 0xC0, dst, imm8);
4618 }
4619 
4620 void Assembler::testb(Address dst, int imm8) {
4621   InstructionMark im(this);
4622   prefix(dst);
4623   emit_int8((unsigned char)0xF6);
4624   emit_operand(rax, dst, 1);
4625   emit_int8(imm8);
4626 }
4627 
4628 void Assembler::testl(Register dst, int32_t imm32) {
4629   // not using emit_arith because test
4630   // doesn't support sign-extension of
4631   // 8bit operands
4632   int encode = dst->encoding();
4633   if (encode == 0) {
4634     emit_int8((unsigned char)0xA9);
4635   } else {
4636     encode = prefix_and_encode(encode);
4637     emit_int8((unsigned char)0xF7);
4638     emit_int8((unsigned char)(0xC0 | encode));
4639   }
4640   emit_int32(imm32);
4641 }
4642 
4643 void Assembler::testl(Register dst, Register src) {
4644   (void) prefix_and_encode(dst->encoding(), src->encoding());
4645   emit_arith(0x85, 0xC0, dst, src);
4646 }
4647 
4648 void Assembler::testl(Register dst, Address src) {
4649   InstructionMark im(this);
4650   prefix(src, dst);
4651   emit_int8((unsigned char)0x85);
4652   emit_operand(dst, src);
4653 }
4654 
4655 void Assembler::tzcntl(Register dst, Register src) {
4656   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
4657   emit_int8((unsigned char)0xF3);
4658   int encode = prefix_and_encode(dst->encoding(), src->encoding());
4659   emit_int8(0x0F);
4660   emit_int8((unsigned char)0xBC);
4661   emit_int8((unsigned char)0xC0 | encode);
4662 }
4663 
4664 void Assembler::tzcntq(Register dst, Register src) {
4665   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
4666   emit_int8((unsigned char)0xF3);
4667   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4668   emit_int8(0x0F);
4669   emit_int8((unsigned char)0xBC);
4670   emit_int8((unsigned char)(0xC0 | encode));
4671 }
4672 
4673 void Assembler::ucomisd(XMMRegister dst, Address src) {
4674   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4675   InstructionMark im(this);
4676   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4677   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4678   attributes.set_rex_vex_w_reverted();
4679   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4680   emit_int8(0x2E);
4681   emit_operand(dst, src);
4682 }
4683 
4684 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
4685   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4686   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4687   attributes.set_rex_vex_w_reverted();
4688   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4689   emit_int8(0x2E);
4690   emit_int8((unsigned char)(0xC0 | encode));
4691 }
4692 
4693 void Assembler::ucomiss(XMMRegister dst, Address src) {
4694   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4695   InstructionMark im(this);
4696   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4697   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4698   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4699   emit_int8(0x2E);
4700   emit_operand(dst, src);
4701 }
4702 
4703 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
4704   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4705   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4706   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4707   emit_int8(0x2E);
4708   emit_int8((unsigned char)(0xC0 | encode));
4709 }
4710 
4711 void Assembler::xabort(int8_t imm8) {
4712   emit_int8((unsigned char)0xC6);
4713   emit_int8((unsigned char)0xF8);
4714   emit_int8((unsigned char)(imm8 & 0xFF));
4715 }
4716 
4717 void Assembler::xaddb(Address dst, Register src) {
4718   InstructionMark im(this);
4719   prefix(dst, src, true);
4720   emit_int8(0x0F);
4721   emit_int8((unsigned char)0xC0);
4722   emit_operand(src, dst);
4723 }
4724 
4725 void Assembler::xaddw(Address dst, Register src) {
4726   InstructionMark im(this);
4727   emit_int8(0x66);
4728   prefix(dst, src);
4729   emit_int8(0x0F);
4730   emit_int8((unsigned char)0xC1);
4731   emit_operand(src, dst);
4732 }
4733 
4734 void Assembler::xaddl(Address dst, Register src) {
4735   InstructionMark im(this);
4736   prefix(dst, src);
4737   emit_int8(0x0F);
4738   emit_int8((unsigned char)0xC1);
4739   emit_operand(src, dst);
4740 }
4741 
4742 void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
4743   InstructionMark im(this);
4744   relocate(rtype);
4745   if (abort.is_bound()) {
4746     address entry = target(abort);
4747     assert(entry != NULL, "abort entry NULL");
4748     intptr_t offset = entry - pc();
4749     emit_int8((unsigned char)0xC7);
4750     emit_int8((unsigned char)0xF8);
4751     emit_int32(offset - 6); // 2 opcode + 4 address
4752   } else {
4753     abort.add_patch_at(code(), locator());
4754     emit_int8((unsigned char)0xC7);
4755     emit_int8((unsigned char)0xF8);
4756     emit_int32(0);
4757   }
4758 }
4759 
4760 void Assembler::xchgb(Register dst, Address src) { // xchg
4761   InstructionMark im(this);
4762   prefix(src, dst, true);
4763   emit_int8((unsigned char)0x86);
4764   emit_operand(dst, src);
4765 }
4766 
4767 void Assembler::xchgw(Register dst, Address src) { // xchg
4768   InstructionMark im(this);
4769   emit_int8(0x66);
4770   prefix(src, dst);
4771   emit_int8((unsigned char)0x87);
4772   emit_operand(dst, src);
4773 }
4774 
4775 void Assembler::xchgl(Register dst, Address src) { // xchg
4776   InstructionMark im(this);
4777   prefix(src, dst);
4778   emit_int8((unsigned char)0x87);
4779   emit_operand(dst, src);
4780 }
4781 
4782 void Assembler::xchgl(Register dst, Register src) {
4783   int encode = prefix_and_encode(dst->encoding(), src->encoding());
4784   emit_int8((unsigned char)0x87);
4785   emit_int8((unsigned char)(0xC0 | encode));
4786 }
4787 
4788 void Assembler::xend() {
4789   emit_int8((unsigned char)0x0F);
4790   emit_int8((unsigned char)0x01);
4791   emit_int8((unsigned char)0xD5);
4792 }
4793 
4794 void Assembler::xgetbv() {
4795   emit_int8(0x0F);
4796   emit_int8(0x01);
4797   emit_int8((unsigned char)0xD0);
4798 }
4799 
4800 void Assembler::xorl(Register dst, int32_t imm32) {
4801   prefix(dst);
4802   emit_arith(0x81, 0xF0, dst, imm32);
4803 }
4804 
4805 void Assembler::xorl(Register dst, Address src) {
4806   InstructionMark im(this);
4807   prefix(src, dst);
4808   emit_int8(0x33);
4809   emit_operand(dst, src);
4810 }
4811 
4812 void Assembler::xorl(Register dst, Register src) {
4813   (void) prefix_and_encode(dst->encoding(), src->encoding());
4814   emit_arith(0x33, 0xC0, dst, src);
4815 }
4816 
4817 void Assembler::xorb(Register dst, Address src) {
4818   InstructionMark im(this);
4819   prefix(src, dst);
4820   emit_int8(0x32);
4821   emit_operand(dst, src);
4822 }
4823 
4824 // AVX 3-operands scalar float-point arithmetic instructions
4825 
4826 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
4827   assert(VM_Version::supports_avx(), "");
4828   InstructionMark im(this);
4829   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4830   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4831   attributes.set_rex_vex_w_reverted();
4832   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4833   emit_int8(0x58);
4834   emit_operand(dst, src);
4835 }
4836 
4837 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4838   assert(VM_Version::supports_avx(), "");
4839   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4840   attributes.set_rex_vex_w_reverted();
4841   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4842   emit_int8(0x58);
4843   emit_int8((unsigned char)(0xC0 | encode));
4844 }
4845 
4846 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
4847   assert(VM_Version::supports_avx(), "");
4848   InstructionMark im(this);
4849   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4850   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4851   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4852   emit_int8(0x58);
4853   emit_operand(dst, src);
4854 }
4855 
4856 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4857   assert(VM_Version::supports_avx(), "");
4858   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4859   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4860   emit_int8(0x58);
4861   emit_int8((unsigned char)(0xC0 | encode));
4862 }
4863 
4864 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
4865   assert(VM_Version::supports_avx(), "");
4866   InstructionMark im(this);
4867   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4868   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4869   attributes.set_rex_vex_w_reverted();
4870   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4871   emit_int8(0x5E);
4872   emit_operand(dst, src);
4873 }
4874 
4875 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4876   assert(VM_Version::supports_avx(), "");
4877   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4878   attributes.set_rex_vex_w_reverted();
4879   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4880   emit_int8(0x5E);
4881   emit_int8((unsigned char)(0xC0 | encode));
4882 }
4883 
4884 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
4885   assert(VM_Version::supports_avx(), "");
4886   InstructionMark im(this);
4887   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4888   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4889   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4890   emit_int8(0x5E);
4891   emit_operand(dst, src);
4892 }
4893 
4894 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4895   assert(VM_Version::supports_avx(), "");
4896   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4897   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4898   emit_int8(0x5E);
4899   emit_int8((unsigned char)(0xC0 | encode));
4900 }
4901 
4902 void Assembler::vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
4903   assert(VM_Version::supports_fma(), "");
4904   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4905   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4906   emit_int8((unsigned char)0xB9);
4907   emit_int8((unsigned char)(0xC0 | encode));
4908 }
4909 
4910 void Assembler::vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
4911   assert(VM_Version::supports_fma(), "");
4912   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4913   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4914   emit_int8((unsigned char)0xB9);
4915   emit_int8((unsigned char)(0xC0 | encode));
4916 }
4917 
4918 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
4919   assert(VM_Version::supports_avx(), "");
4920   InstructionMark im(this);
4921   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4922   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4923   attributes.set_rex_vex_w_reverted();
4924   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4925   emit_int8(0x59);
4926   emit_operand(dst, src);
4927 }
4928 
4929 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4930   assert(VM_Version::supports_avx(), "");
4931   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4932   attributes.set_rex_vex_w_reverted();
4933   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4934   emit_int8(0x59);
4935   emit_int8((unsigned char)(0xC0 | encode));
4936 }
4937 
4938 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
4939   assert(VM_Version::supports_avx(), "");
4940   InstructionMark im(this);
4941   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4942   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4943   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4944   emit_int8(0x59);
4945   emit_operand(dst, src);
4946 }
4947 
4948 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4949   assert(VM_Version::supports_avx(), "");
4950   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4951   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4952   emit_int8(0x59);
4953   emit_int8((unsigned char)(0xC0 | encode));
4954 }
4955 
4956 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
4957   assert(VM_Version::supports_avx(), "");
4958   InstructionMark im(this);
4959   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4960   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4961   attributes.set_rex_vex_w_reverted();
4962   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4963   emit_int8(0x5C);
4964   emit_operand(dst, src);
4965 }
4966 
4967 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4968   assert(VM_Version::supports_avx(), "");
4969   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4970   attributes.set_rex_vex_w_reverted();
4971   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4972   emit_int8(0x5C);
4973   emit_int8((unsigned char)(0xC0 | encode));
4974 }
4975 
4976 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
4977   assert(VM_Version::supports_avx(), "");
4978   InstructionMark im(this);
4979   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4980   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4981   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4982   emit_int8(0x5C);
4983   emit_operand(dst, src);
4984 }
4985 
4986 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4987   assert(VM_Version::supports_avx(), "");
4988   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4989   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4990   emit_int8(0x5C);
4991   emit_int8((unsigned char)(0xC0 | encode));
4992 }
4993 
4994 //====================VECTOR ARITHMETIC=====================================
4995 
4996 // Float-point vector arithmetic
4997 
4998 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
4999   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5000   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5001   attributes.set_rex_vex_w_reverted();
5002   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5003   emit_int8(0x58);
5004   emit_int8((unsigned char)(0xC0 | encode));
5005 }
5006 
5007 void Assembler::addpd(XMMRegister dst, Address src) {
5008   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5009   InstructionMark im(this);
5010   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5011   attributes.set_rex_vex_w_reverted();
5012   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5013   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5014   emit_int8(0x58);
5015   emit_operand(dst, src);
5016 }
5017 
5018 
5019 void Assembler::addps(XMMRegister dst, XMMRegister src) {
5020   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5021   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5022   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5023   emit_int8(0x58);
5024   emit_int8((unsigned char)(0xC0 | encode));
5025 }
5026 
5027 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5028   assert(VM_Version::supports_avx(), "");
5029   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5030   attributes.set_rex_vex_w_reverted();
5031   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5032   emit_int8(0x58);
5033   emit_int8((unsigned char)(0xC0 | encode));
5034 }
5035 
5036 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5037   assert(VM_Version::supports_avx(), "");
5038   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5039   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5040   emit_int8(0x58);
5041   emit_int8((unsigned char)(0xC0 | encode));
5042 }
5043 
5044 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5045   assert(VM_Version::supports_avx(), "");
5046   InstructionMark im(this);
5047   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5048   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5049   attributes.set_rex_vex_w_reverted();
5050   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5051   emit_int8(0x58);
5052   emit_operand(dst, src);
5053 }
5054 
5055 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5056   assert(VM_Version::supports_avx(), "");
5057   InstructionMark im(this);
5058   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5059   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5060   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5061   emit_int8(0x58);
5062   emit_operand(dst, src);
5063 }
5064 
5065 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
5066   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5067   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5068   attributes.set_rex_vex_w_reverted();
5069   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5070   emit_int8(0x5C);
5071   emit_int8((unsigned char)(0xC0 | encode));
5072 }
5073 
5074 void Assembler::subps(XMMRegister dst, XMMRegister src) {
5075   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5076   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5077   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5078   emit_int8(0x5C);
5079   emit_int8((unsigned char)(0xC0 | encode));
5080 }
5081 
5082 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5083   assert(VM_Version::supports_avx(), "");
5084   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5085   attributes.set_rex_vex_w_reverted();
5086   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5087   emit_int8(0x5C);
5088   emit_int8((unsigned char)(0xC0 | encode));
5089 }
5090 
5091 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5092   assert(VM_Version::supports_avx(), "");
5093   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5094   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5095   emit_int8(0x5C);
5096   emit_int8((unsigned char)(0xC0 | encode));
5097 }
5098 
5099 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5100   assert(VM_Version::supports_avx(), "");
5101   InstructionMark im(this);
5102   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5103   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5104   attributes.set_rex_vex_w_reverted();
5105   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5106   emit_int8(0x5C);
5107   emit_operand(dst, src);
5108 }
5109 
5110 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5111   assert(VM_Version::supports_avx(), "");
5112   InstructionMark im(this);
5113   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5114   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5115   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5116   emit_int8(0x5C);
5117   emit_operand(dst, src);
5118 }
5119 
5120 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
5121   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5122   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5123   attributes.set_rex_vex_w_reverted();
5124   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5125   emit_int8(0x59);
5126   emit_int8((unsigned char)(0xC0 | encode));
5127 }
5128 
5129 void Assembler::mulpd(XMMRegister dst, Address src) {
5130   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5131   InstructionMark im(this);
5132   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5133   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5134   attributes.set_rex_vex_w_reverted();
5135   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5136   emit_int8(0x59);
5137   emit_operand(dst, src);
5138 }
5139 
5140 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
5141   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5142   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5143   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5144   emit_int8(0x59);
5145   emit_int8((unsigned char)(0xC0 | encode));
5146 }
5147 
5148 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5149   assert(VM_Version::supports_avx(), "");
5150   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5151   attributes.set_rex_vex_w_reverted();
5152   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5153   emit_int8(0x59);
5154   emit_int8((unsigned char)(0xC0 | encode));
5155 }
5156 
5157 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5158   assert(VM_Version::supports_avx(), "");
5159   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5160   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5161   emit_int8(0x59);
5162   emit_int8((unsigned char)(0xC0 | encode));
5163 }
5164 
5165 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5166   assert(VM_Version::supports_avx(), "");
5167   InstructionMark im(this);
5168   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5169   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5170   attributes.set_rex_vex_w_reverted();
5171   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5172   emit_int8(0x59);
5173   emit_operand(dst, src);
5174 }
5175 
5176 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5177   assert(VM_Version::supports_avx(), "");
5178   InstructionMark im(this);
5179   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5180   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5181   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5182   emit_int8(0x59);
5183   emit_operand(dst, src);
5184 }
5185 
5186 void Assembler::vfmadd231pd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
5187   assert(VM_Version::supports_fma(), "");
5188   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5189   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5190   emit_int8((unsigned char)0xB8);
5191   emit_int8((unsigned char)(0xC0 | encode));
5192 }
5193 
5194 void Assembler::vfmadd231ps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
5195   assert(VM_Version::supports_fma(), "");
5196   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5197   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5198   emit_int8((unsigned char)0xB8);
5199   emit_int8((unsigned char)(0xC0 | encode));
5200 }
5201 
5202 void Assembler::vfmadd231pd(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) {
5203   assert(VM_Version::supports_fma(), "");
5204   InstructionMark im(this);
5205   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5206   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5207   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5208   emit_int8((unsigned char)0xB8);
5209   emit_operand(dst, src2);
5210 }
5211 
5212 void Assembler::vfmadd231ps(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) {
5213   assert(VM_Version::supports_fma(), "");
5214   InstructionMark im(this);
5215   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5216   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5217   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5218   emit_int8((unsigned char)0xB8);
5219   emit_operand(dst, src2);
5220 }
5221 
5222 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
5223   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5224   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5225   attributes.set_rex_vex_w_reverted();
5226   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5227   emit_int8(0x5E);
5228   emit_int8((unsigned char)(0xC0 | encode));
5229 }
5230 
5231 void Assembler::divps(XMMRegister dst, XMMRegister src) {
5232   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5233   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5234   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5235   emit_int8(0x5E);
5236   emit_int8((unsigned char)(0xC0 | encode));
5237 }
5238 
5239 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5240   assert(VM_Version::supports_avx(), "");
5241   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5242   attributes.set_rex_vex_w_reverted();
5243   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5244   emit_int8(0x5E);
5245   emit_int8((unsigned char)(0xC0 | encode));
5246 }
5247 
5248 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5249   assert(VM_Version::supports_avx(), "");
5250   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5251   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5252   emit_int8(0x5E);
5253   emit_int8((unsigned char)(0xC0 | encode));
5254 }
5255 
5256 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5257   assert(VM_Version::supports_avx(), "");
5258   InstructionMark im(this);
5259   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5260   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5261   attributes.set_rex_vex_w_reverted();
5262   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5263   emit_int8(0x5E);
5264   emit_operand(dst, src);
5265 }
5266 
5267 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5268   assert(VM_Version::supports_avx(), "");
5269   InstructionMark im(this);
5270   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5271   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5272   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5273   emit_int8(0x5E);
5274   emit_operand(dst, src);
5275 }
5276 
5277 void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
5278   assert(VM_Version::supports_avx(), "");
5279   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5280   attributes.set_rex_vex_w_reverted();
5281   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5282   emit_int8(0x51);
5283   emit_int8((unsigned char)(0xC0 | encode));
5284 }
5285 
5286 void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
5287   assert(VM_Version::supports_avx(), "");
5288   InstructionMark im(this);
5289   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5290   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5291   attributes.set_rex_vex_w_reverted();
5292   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5293   emit_int8(0x51);
5294   emit_operand(dst, src);
5295 }
5296 
5297 void Assembler::vsqrtps(XMMRegister dst, XMMRegister src, int vector_len) {
5298   assert(VM_Version::supports_avx(), "");
5299   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5300   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5301   emit_int8(0x51);
5302   emit_int8((unsigned char)(0xC0 | encode));
5303 }
5304 
5305 void Assembler::vsqrtps(XMMRegister dst, Address src, int vector_len) {
5306   assert(VM_Version::supports_avx(), "");
5307   InstructionMark im(this);
5308   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5309   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5310   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5311   emit_int8(0x51);
5312   emit_operand(dst, src);
5313 }
5314 
5315 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
5316   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5317   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5318   attributes.set_rex_vex_w_reverted();
5319   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5320   emit_int8(0x54);
5321   emit_int8((unsigned char)(0xC0 | encode));
5322 }
5323 
5324 void Assembler::andps(XMMRegister dst, XMMRegister src) {
5325   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5326   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5327   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5328   emit_int8(0x54);
5329   emit_int8((unsigned char)(0xC0 | encode));
5330 }
5331 
5332 void Assembler::andps(XMMRegister dst, Address src) {
5333   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5334   InstructionMark im(this);
5335   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5336   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5337   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5338   emit_int8(0x54);
5339   emit_operand(dst, src);
5340 }
5341 
5342 void Assembler::andpd(XMMRegister dst, Address src) {
5343   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5344   InstructionMark im(this);
5345   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5346   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5347   attributes.set_rex_vex_w_reverted();
5348   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5349   emit_int8(0x54);
5350   emit_operand(dst, src);
5351 }
5352 
5353 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5354   assert(VM_Version::supports_avx(), "");
5355   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5356   attributes.set_rex_vex_w_reverted();
5357   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5358   emit_int8(0x54);
5359   emit_int8((unsigned char)(0xC0 | encode));
5360 }
5361 
5362 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5363   assert(VM_Version::supports_avx(), "");
5364   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5365   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5366   emit_int8(0x54);
5367   emit_int8((unsigned char)(0xC0 | encode));
5368 }
5369 
5370 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5371   assert(VM_Version::supports_avx(), "");
5372   InstructionMark im(this);
5373   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5374   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5375   attributes.set_rex_vex_w_reverted();
5376   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5377   emit_int8(0x54);
5378   emit_operand(dst, src);
5379 }
5380 
5381 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5382   assert(VM_Version::supports_avx(), "");
5383   InstructionMark im(this);
5384   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5385   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5386   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5387   emit_int8(0x54);
5388   emit_operand(dst, src);
5389 }
5390 
5391 void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
5392   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5393   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5394   attributes.set_rex_vex_w_reverted();
5395   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5396   emit_int8(0x15);
5397   emit_int8((unsigned char)(0xC0 | encode));
5398 }
5399 
5400 void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
5401   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5402   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5403   attributes.set_rex_vex_w_reverted();
5404   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5405   emit_int8(0x14);
5406   emit_int8((unsigned char)(0xC0 | encode));
5407 }
5408 
5409 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
5410   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5411   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5412   attributes.set_rex_vex_w_reverted();
5413   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5414   emit_int8(0x57);
5415   emit_int8((unsigned char)(0xC0 | encode));
5416 }
5417 
5418 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
5419   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5420   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5421   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5422   emit_int8(0x57);
5423   emit_int8((unsigned char)(0xC0 | encode));
5424 }
5425 
5426 void Assembler::xorpd(XMMRegister dst, Address src) {
5427   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5428   InstructionMark im(this);
5429   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5430   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5431   attributes.set_rex_vex_w_reverted();
5432   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5433   emit_int8(0x57);
5434   emit_operand(dst, src);
5435 }
5436 
5437 void Assembler::xorps(XMMRegister dst, Address src) {
5438   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5439   InstructionMark im(this);
5440   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5441   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5442   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5443   emit_int8(0x57);
5444   emit_operand(dst, src);
5445 }
5446 
5447 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5448   assert(VM_Version::supports_avx(), "");
5449   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5450   attributes.set_rex_vex_w_reverted();
5451   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5452   emit_int8(0x57);
5453   emit_int8((unsigned char)(0xC0 | encode));
5454 }
5455 
5456 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5457   assert(VM_Version::supports_avx(), "");
5458   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5459   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5460   emit_int8(0x57);
5461   emit_int8((unsigned char)(0xC0 | encode));
5462 }
5463 
5464 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5465   assert(VM_Version::supports_avx(), "");
5466   InstructionMark im(this);
5467   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5468   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5469   attributes.set_rex_vex_w_reverted();
5470   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5471   emit_int8(0x57);
5472   emit_operand(dst, src);
5473 }
5474 
5475 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5476   assert(VM_Version::supports_avx(), "");
5477   InstructionMark im(this);
5478   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5479   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5480   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5481   emit_int8(0x57);
5482   emit_operand(dst, src);
5483 }
5484 
5485 // Integer vector arithmetic
5486 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5487   assert(VM_Version::supports_avx() && (vector_len == 0) ||
5488          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
5489   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5490   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5491   emit_int8(0x01);
5492   emit_int8((unsigned char)(0xC0 | encode));
5493 }
5494 
5495 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5496   assert(VM_Version::supports_avx() && (vector_len == 0) ||
5497          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
5498   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
5499   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5500   emit_int8(0x02);
5501   emit_int8((unsigned char)(0xC0 | encode));
5502 }
5503 
5504 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
5505   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5506   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5507   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5508   emit_int8((unsigned char)0xFC);
5509   emit_int8((unsigned char)(0xC0 | encode));
5510 }
5511 
5512 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
5513   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5514   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5515   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5516   emit_int8((unsigned char)0xFD);
5517   emit_int8((unsigned char)(0xC0 | encode));
5518 }
5519 
5520 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
5521   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5522   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5523   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5524   emit_int8((unsigned char)0xFE);
5525   emit_int8((unsigned char)(0xC0 | encode));
5526 }
5527 
5528 void Assembler::paddd(XMMRegister dst, Address src) {
5529   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5530   InstructionMark im(this);
5531   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5532   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5533   emit_int8((unsigned char)0xFE);
5534   emit_operand(dst, src);
5535 }
5536 
5537 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
5538   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5539   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5540   attributes.set_rex_vex_w_reverted();
5541   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5542   emit_int8((unsigned char)0xD4);
5543   emit_int8((unsigned char)(0xC0 | encode));
5544 }
5545 
5546 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
5547   assert(VM_Version::supports_sse3(), "");
5548   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5549   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5550   emit_int8(0x01);
5551   emit_int8((unsigned char)(0xC0 | encode));
5552 }
5553 
5554 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
5555   assert(VM_Version::supports_sse3(), "");
5556   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
5557   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5558   emit_int8(0x02);
5559   emit_int8((unsigned char)(0xC0 | encode));
5560 }
5561 
5562 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5563   assert(UseAVX > 0, "requires some form of AVX");
5564   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5565   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5566   emit_int8((unsigned char)0xFC);
5567   emit_int8((unsigned char)(0xC0 | encode));
5568 }
5569 
5570 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5571   assert(UseAVX > 0, "requires some form of AVX");
5572   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5573   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5574   emit_int8((unsigned char)0xFD);
5575   emit_int8((unsigned char)(0xC0 | encode));
5576 }
5577 
5578 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5579   assert(UseAVX > 0, "requires some form of AVX");
5580   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5581   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5582   emit_int8((unsigned char)0xFE);
5583   emit_int8((unsigned char)(0xC0 | encode));
5584 }
5585 
5586 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5587   assert(UseAVX > 0, "requires some form of AVX");
5588   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5589   attributes.set_rex_vex_w_reverted();
5590   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5591   emit_int8((unsigned char)0xD4);
5592   emit_int8((unsigned char)(0xC0 | encode));
5593 }
5594 
5595 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5596   assert(UseAVX > 0, "requires some form of AVX");
5597   InstructionMark im(this);
5598   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5599   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5600   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5601   emit_int8((unsigned char)0xFC);
5602   emit_operand(dst, src);
5603 }
5604 
5605 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5606   assert(UseAVX > 0, "requires some form of AVX");
5607   InstructionMark im(this);
5608   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5609   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5610   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5611   emit_int8((unsigned char)0xFD);
5612   emit_operand(dst, src);
5613 }
5614 
5615 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5616   assert(UseAVX > 0, "requires some form of AVX");
5617   InstructionMark im(this);
5618   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5619   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5620   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5621   emit_int8((unsigned char)0xFE);
5622   emit_operand(dst, src);
5623 }
5624 
5625 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5626   assert(UseAVX > 0, "requires some form of AVX");
5627   InstructionMark im(this);
5628   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5629   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5630   attributes.set_rex_vex_w_reverted();
5631   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5632   emit_int8((unsigned char)0xD4);
5633   emit_operand(dst, src);
5634 }
5635 
5636 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
5637   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5638   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5639   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5640   emit_int8((unsigned char)0xF8);
5641   emit_int8((unsigned char)(0xC0 | encode));
5642 }
5643 
5644 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
5645   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5646   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5647   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5648   emit_int8((unsigned char)0xF9);
5649   emit_int8((unsigned char)(0xC0 | encode));
5650 }
5651 
5652 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
5653   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5654   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5655   emit_int8((unsigned char)0xFA);
5656   emit_int8((unsigned char)(0xC0 | encode));
5657 }
5658 
5659 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
5660   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5661   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5662   attributes.set_rex_vex_w_reverted();
5663   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5664   emit_int8((unsigned char)0xFB);
5665   emit_int8((unsigned char)(0xC0 | encode));
5666 }
5667 
5668 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5669   assert(UseAVX > 0, "requires some form of AVX");
5670   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5671   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5672   emit_int8((unsigned char)0xF8);
5673   emit_int8((unsigned char)(0xC0 | encode));
5674 }
5675 
5676 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5677   assert(UseAVX > 0, "requires some form of AVX");
5678   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5679   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5680   emit_int8((unsigned char)0xF9);
5681   emit_int8((unsigned char)(0xC0 | encode));
5682 }
5683 
5684 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5685   assert(UseAVX > 0, "requires some form of AVX");
5686   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5687   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5688   emit_int8((unsigned char)0xFA);
5689   emit_int8((unsigned char)(0xC0 | encode));
5690 }
5691 
5692 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5693   assert(UseAVX > 0, "requires some form of AVX");
5694   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5695   attributes.set_rex_vex_w_reverted();
5696   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5697   emit_int8((unsigned char)0xFB);
5698   emit_int8((unsigned char)(0xC0 | encode));
5699 }
5700 
5701 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5702   assert(UseAVX > 0, "requires some form of AVX");
5703   InstructionMark im(this);
5704   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5705   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5706   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5707   emit_int8((unsigned char)0xF8);
5708   emit_operand(dst, src);
5709 }
5710 
5711 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5712   assert(UseAVX > 0, "requires some form of AVX");
5713   InstructionMark im(this);
5714   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5715   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5716   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5717   emit_int8((unsigned char)0xF9);
5718   emit_operand(dst, src);
5719 }
5720 
5721 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5722   assert(UseAVX > 0, "requires some form of AVX");
5723   InstructionMark im(this);
5724   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5725   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5726   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5727   emit_int8((unsigned char)0xFA);
5728   emit_operand(dst, src);
5729 }
5730 
5731 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5732   assert(UseAVX > 0, "requires some form of AVX");
5733   InstructionMark im(this);
5734   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5735   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5736   attributes.set_rex_vex_w_reverted();
5737   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5738   emit_int8((unsigned char)0xFB);
5739   emit_operand(dst, src);
5740 }
5741 
5742 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
5743   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5744   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5745   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5746   emit_int8((unsigned char)0xD5);
5747   emit_int8((unsigned char)(0xC0 | encode));
5748 }
5749 
5750 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
5751   assert(VM_Version::supports_sse4_1(), "");
5752   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5753   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5754   emit_int8(0x40);
5755   emit_int8((unsigned char)(0xC0 | encode));
5756 }
5757 
5758 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5759   assert(UseAVX > 0, "requires some form of AVX");
5760   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5761   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5762   emit_int8((unsigned char)0xD5);
5763   emit_int8((unsigned char)(0xC0 | encode));
5764 }
5765 
5766 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5767   assert(UseAVX > 0, "requires some form of AVX");
5768   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5769   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5770   emit_int8(0x40);
5771   emit_int8((unsigned char)(0xC0 | encode));
5772 }
5773 
5774 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5775   assert(UseAVX > 2, "requires some form of EVEX");
5776   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5777   attributes.set_is_evex_instruction();
5778   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5779   emit_int8(0x40);
5780   emit_int8((unsigned char)(0xC0 | encode));
5781 }
5782 
5783 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5784   assert(UseAVX > 0, "requires some form of AVX");
5785   InstructionMark im(this);
5786   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5787   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5788   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5789   emit_int8((unsigned char)0xD5);
5790   emit_operand(dst, src);
5791 }
5792 
5793 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5794   assert(UseAVX > 0, "requires some form of AVX");
5795   InstructionMark im(this);
5796   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5797   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5798   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5799   emit_int8(0x40);
5800   emit_operand(dst, src);
5801 }
5802 
5803 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5804   assert(UseAVX > 2, "requires some form of EVEX");
5805   InstructionMark im(this);
5806   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5807   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5808   attributes.set_is_evex_instruction();
5809   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5810   emit_int8(0x40);
5811   emit_operand(dst, src);
5812 }
5813 
5814 // Shift packed integers left by specified number of bits.
5815 void Assembler::psllw(XMMRegister dst, int shift) {
5816   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5817   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5818   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
5819   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5820   emit_int8(0x71);
5821   emit_int8((unsigned char)(0xC0 | encode));
5822   emit_int8(shift & 0xFF);
5823 }
5824 
5825 void Assembler::pslld(XMMRegister dst, int shift) {
5826   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5827   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5828   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
5829   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5830   emit_int8(0x72);
5831   emit_int8((unsigned char)(0xC0 | encode));
5832   emit_int8(shift & 0xFF);
5833 }
5834 
5835 void Assembler::psllq(XMMRegister dst, int shift) {
5836   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5837   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5838   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
5839   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5840   emit_int8(0x73);
5841   emit_int8((unsigned char)(0xC0 | encode));
5842   emit_int8(shift & 0xFF);
5843 }
5844 
5845 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
5846   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5847   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5848   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5849   emit_int8((unsigned char)0xF1);
5850   emit_int8((unsigned char)(0xC0 | encode));
5851 }
5852 
5853 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
5854   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5855   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5856   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5857   emit_int8((unsigned char)0xF2);
5858   emit_int8((unsigned char)(0xC0 | encode));
5859 }
5860 
5861 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
5862   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5863   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5864   attributes.set_rex_vex_w_reverted();
5865   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5866   emit_int8((unsigned char)0xF3);
5867   emit_int8((unsigned char)(0xC0 | encode));
5868 }
5869 
5870 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5871   assert(UseAVX > 0, "requires some form of AVX");
5872   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5873   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
5874   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5875   emit_int8(0x71);
5876   emit_int8((unsigned char)(0xC0 | encode));
5877   emit_int8(shift & 0xFF);
5878 }
5879 
5880 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5881   assert(UseAVX > 0, "requires some form of AVX");
5882   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5883   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5884   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
5885   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5886   emit_int8(0x72);
5887   emit_int8((unsigned char)(0xC0 | encode));
5888   emit_int8(shift & 0xFF);
5889 }
5890 
5891 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5892   assert(UseAVX > 0, "requires some form of AVX");
5893   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5894   attributes.set_rex_vex_w_reverted();
5895   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
5896   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5897   emit_int8(0x73);
5898   emit_int8((unsigned char)(0xC0 | encode));
5899   emit_int8(shift & 0xFF);
5900 }
5901 
5902 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5903   assert(UseAVX > 0, "requires some form of AVX");
5904   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5905   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5906   emit_int8((unsigned char)0xF1);
5907   emit_int8((unsigned char)(0xC0 | encode));
5908 }
5909 
5910 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5911   assert(UseAVX > 0, "requires some form of AVX");
5912   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5913   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5914   emit_int8((unsigned char)0xF2);
5915   emit_int8((unsigned char)(0xC0 | encode));
5916 }
5917 
5918 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5919   assert(UseAVX > 0, "requires some form of AVX");
5920   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5921   attributes.set_rex_vex_w_reverted();
5922   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5923   emit_int8((unsigned char)0xF3);
5924   emit_int8((unsigned char)(0xC0 | encode));
5925 }
5926 
5927 // Shift packed integers logically right by specified number of bits.
5928 void Assembler::psrlw(XMMRegister dst, int shift) {
5929   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5930   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5931   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
5932   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5933   emit_int8(0x71);
5934   emit_int8((unsigned char)(0xC0 | encode));
5935   emit_int8(shift & 0xFF);
5936 }
5937 
5938 void Assembler::psrld(XMMRegister dst, int shift) {
5939   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5940   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5941   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
5942   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5943   emit_int8(0x72);
5944   emit_int8((unsigned char)(0xC0 | encode));
5945   emit_int8(shift & 0xFF);
5946 }
5947 
5948 void Assembler::psrlq(XMMRegister dst, int shift) {
5949   // Do not confuse it with psrldq SSE2 instruction which
5950   // shifts 128 bit value in xmm register by number of bytes.
5951   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5952   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5953   attributes.set_rex_vex_w_reverted();
5954   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
5955   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5956   emit_int8(0x73);
5957   emit_int8((unsigned char)(0xC0 | encode));
5958   emit_int8(shift & 0xFF);
5959 }
5960 
5961 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
5962   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5963   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5964   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5965   emit_int8((unsigned char)0xD1);
5966   emit_int8((unsigned char)(0xC0 | encode));
5967 }
5968 
5969 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
5970   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5971   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5972   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5973   emit_int8((unsigned char)0xD2);
5974   emit_int8((unsigned char)(0xC0 | encode));
5975 }
5976 
5977 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
5978   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5979   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5980   attributes.set_rex_vex_w_reverted();
5981   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5982   emit_int8((unsigned char)0xD3);
5983   emit_int8((unsigned char)(0xC0 | encode));
5984 }
5985 
5986 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5987   assert(UseAVX > 0, "requires some form of AVX");
5988   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5989   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
5990   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5991   emit_int8(0x71);
5992   emit_int8((unsigned char)(0xC0 | encode));
5993   emit_int8(shift & 0xFF);
5994 }
5995 
5996 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5997   assert(UseAVX > 0, "requires some form of AVX");
5998   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5999   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
6000   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6001   emit_int8(0x72);
6002   emit_int8((unsigned char)(0xC0 | encode));
6003   emit_int8(shift & 0xFF);
6004 }
6005 
6006 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6007   assert(UseAVX > 0, "requires some form of AVX");
6008   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6009   attributes.set_rex_vex_w_reverted();
6010   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
6011   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6012   emit_int8(0x73);
6013   emit_int8((unsigned char)(0xC0 | encode));
6014   emit_int8(shift & 0xFF);
6015 }
6016 
6017 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6018   assert(UseAVX > 0, "requires some form of AVX");
6019   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6020   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6021   emit_int8((unsigned char)0xD1);
6022   emit_int8((unsigned char)(0xC0 | encode));
6023 }
6024 
6025 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6026   assert(UseAVX > 0, "requires some form of AVX");
6027   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6028   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6029   emit_int8((unsigned char)0xD2);
6030   emit_int8((unsigned char)(0xC0 | encode));
6031 }
6032 
6033 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6034   assert(UseAVX > 0, "requires some form of AVX");
6035   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6036   attributes.set_rex_vex_w_reverted();
6037   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6038   emit_int8((unsigned char)0xD3);
6039   emit_int8((unsigned char)(0xC0 | encode));
6040 }
6041 
6042 // Shift packed integers arithmetically right by specified number of bits.
6043 void Assembler::psraw(XMMRegister dst, int shift) {
6044   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6045   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6046   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6047   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6048   emit_int8(0x71);
6049   emit_int8((unsigned char)(0xC0 | encode));
6050   emit_int8(shift & 0xFF);
6051 }
6052 
6053 void Assembler::psrad(XMMRegister dst, int shift) {
6054   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6055   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6056   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
6057   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6058   emit_int8(0x72);
6059   emit_int8((unsigned char)(0xC0 | encode));
6060   emit_int8(shift & 0xFF);
6061 }
6062 
6063 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
6064   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6065   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6066   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6067   emit_int8((unsigned char)0xE1);
6068   emit_int8((unsigned char)(0xC0 | encode));
6069 }
6070 
6071 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
6072   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6073   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6074   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6075   emit_int8((unsigned char)0xE2);
6076   emit_int8((unsigned char)(0xC0 | encode));
6077 }
6078 
6079 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6080   assert(UseAVX > 0, "requires some form of AVX");
6081   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6082   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6083   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6084   emit_int8(0x71);
6085   emit_int8((unsigned char)(0xC0 | encode));
6086   emit_int8(shift & 0xFF);
6087 }
6088 
6089 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6090   assert(UseAVX > 0, "requires some form of AVX");
6091   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6092   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6093   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6094   emit_int8(0x72);
6095   emit_int8((unsigned char)(0xC0 | encode));
6096   emit_int8(shift & 0xFF);
6097 }
6098 
6099 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6100   assert(UseAVX > 0, "requires some form of AVX");
6101   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6102   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6103   emit_int8((unsigned char)0xE1);
6104   emit_int8((unsigned char)(0xC0 | encode));
6105 }
6106 
6107 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6108   assert(UseAVX > 0, "requires some form of AVX");
6109   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6110   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6111   emit_int8((unsigned char)0xE2);
6112   emit_int8((unsigned char)(0xC0 | encode));
6113 }
6114 
6115 
6116 // logical operations packed integers
6117 void Assembler::pand(XMMRegister dst, XMMRegister src) {
6118   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6119   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6120   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6121   emit_int8((unsigned char)0xDB);
6122   emit_int8((unsigned char)(0xC0 | encode));
6123 }
6124 
6125 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6126   assert(UseAVX > 0, "requires some form of AVX");
6127   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6128   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6129   emit_int8((unsigned char)0xDB);
6130   emit_int8((unsigned char)(0xC0 | encode));
6131 }
6132 
6133 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6134   assert(UseAVX > 0, "requires some form of AVX");
6135   InstructionMark im(this);
6136   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6137   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6138   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6139   emit_int8((unsigned char)0xDB);
6140   emit_operand(dst, src);
6141 }
6142 
6143 void Assembler::pandn(XMMRegister dst, XMMRegister src) {
6144   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6145   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6146   attributes.set_rex_vex_w_reverted();
6147   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6148   emit_int8((unsigned char)0xDF);
6149   emit_int8((unsigned char)(0xC0 | encode));
6150 }
6151 
6152 void Assembler::por(XMMRegister dst, XMMRegister src) {
6153   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6154   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6155   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6156   emit_int8((unsigned char)0xEB);
6157   emit_int8((unsigned char)(0xC0 | encode));
6158 }
6159 
6160 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6161   assert(UseAVX > 0, "requires some form of AVX");
6162   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6163   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6164   emit_int8((unsigned char)0xEB);
6165   emit_int8((unsigned char)(0xC0 | encode));
6166 }
6167 
6168 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6169   assert(UseAVX > 0, "requires some form of AVX");
6170   InstructionMark im(this);
6171   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6172   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6173   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6174   emit_int8((unsigned char)0xEB);
6175   emit_operand(dst, src);
6176 }
6177 
6178 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
6179   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6180   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6181   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6182   emit_int8((unsigned char)0xEF);
6183   emit_int8((unsigned char)(0xC0 | encode));
6184 }
6185 
6186 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6187   assert(UseAVX > 0, "requires some form of AVX");
6188   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6189   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6190   emit_int8((unsigned char)0xEF);
6191   emit_int8((unsigned char)(0xC0 | encode));
6192 }
6193 
6194 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6195   assert(UseAVX > 0, "requires some form of AVX");
6196   InstructionMark im(this);
6197   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6198   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6199   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6200   emit_int8((unsigned char)0xEF);
6201   emit_operand(dst, src);
6202 }
6203 
6204 
6205 // vinserti forms
6206 
6207 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6208   assert(VM_Version::supports_avx2(), "");
6209   assert(imm8 <= 0x01, "imm8: %u", imm8);
6210   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6211   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6212   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6213   emit_int8(0x38);
6214   emit_int8((unsigned char)(0xC0 | encode));
6215   // 0x00 - insert into lower 128 bits
6216   // 0x01 - insert into upper 128 bits
6217   emit_int8(imm8 & 0x01);
6218 }
6219 
6220 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6221   assert(VM_Version::supports_avx2(), "");
6222   assert(dst != xnoreg, "sanity");
6223   assert(imm8 <= 0x01, "imm8: %u", imm8);
6224   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6225   InstructionMark im(this);
6226   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6227   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6228   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6229   emit_int8(0x38);
6230   emit_operand(dst, src);
6231   // 0x00 - insert into lower 128 bits
6232   // 0x01 - insert into upper 128 bits
6233   emit_int8(imm8 & 0x01);
6234 }
6235 
6236 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6237   assert(VM_Version::supports_evex(), "");
6238   assert(imm8 <= 0x03, "imm8: %u", imm8);
6239   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6240   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6241   emit_int8(0x38);
6242   emit_int8((unsigned char)(0xC0 | encode));
6243   // 0x00 - insert into q0 128 bits (0..127)
6244   // 0x01 - insert into q1 128 bits (128..255)
6245   // 0x02 - insert into q2 128 bits (256..383)
6246   // 0x03 - insert into q3 128 bits (384..511)
6247   emit_int8(imm8 & 0x03);
6248 }
6249 
6250 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6251   assert(VM_Version::supports_avx(), "");
6252   assert(dst != xnoreg, "sanity");
6253   assert(imm8 <= 0x03, "imm8: %u", imm8);
6254   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6255   InstructionMark im(this);
6256   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6257   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6258   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6259   emit_int8(0x18);
6260   emit_operand(dst, src);
6261   // 0x00 - insert into q0 128 bits (0..127)
6262   // 0x01 - insert into q1 128 bits (128..255)
6263   // 0x02 - insert into q2 128 bits (256..383)
6264   // 0x03 - insert into q3 128 bits (384..511)
6265   emit_int8(imm8 & 0x03);
6266 }
6267 
6268 void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6269   assert(VM_Version::supports_evex(), "");
6270   assert(imm8 <= 0x01, "imm8: %u", imm8);
6271   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6272   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6273   emit_int8(0x38);
6274   emit_int8((unsigned char)(0xC0 | encode));
6275   // 0x00 - insert into lower 256 bits
6276   // 0x01 - insert into upper 256 bits
6277   emit_int8(imm8 & 0x01);
6278 }
6279 
6280 
6281 // vinsertf forms
6282 
6283 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6284   assert(VM_Version::supports_avx(), "");
6285   assert(imm8 <= 0x01, "imm8: %u", imm8);
6286   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6287   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6288   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6289   emit_int8(0x18);
6290   emit_int8((unsigned char)(0xC0 | encode));
6291   // 0x00 - insert into lower 128 bits
6292   // 0x01 - insert into upper 128 bits
6293   emit_int8(imm8 & 0x01);
6294 }
6295 
6296 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6297   assert(VM_Version::supports_avx(), "");
6298   assert(dst != xnoreg, "sanity");
6299   assert(imm8 <= 0x01, "imm8: %u", imm8);
6300   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6301   InstructionMark im(this);
6302   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6303   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6304   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6305   emit_int8(0x18);
6306   emit_operand(dst, src);
6307   // 0x00 - insert into lower 128 bits
6308   // 0x01 - insert into upper 128 bits
6309   emit_int8(imm8 & 0x01);
6310 }
6311 
6312 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6313   assert(VM_Version::supports_evex(), "");
6314   assert(imm8 <= 0x03, "imm8: %u", imm8);
6315   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6316   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6317   emit_int8(0x18);
6318   emit_int8((unsigned char)(0xC0 | encode));
6319   // 0x00 - insert into q0 128 bits (0..127)
6320   // 0x01 - insert into q1 128 bits (128..255)
6321   // 0x02 - insert into q2 128 bits (256..383)
6322   // 0x03 - insert into q3 128 bits (384..511)
6323   emit_int8(imm8 & 0x03);
6324 }
6325 
6326 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6327   assert(VM_Version::supports_avx(), "");
6328   assert(dst != xnoreg, "sanity");
6329   assert(imm8 <= 0x03, "imm8: %u", imm8);
6330   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6331   InstructionMark im(this);
6332   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6333   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6334   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6335   emit_int8(0x18);
6336   emit_operand(dst, src);
6337   // 0x00 - insert into q0 128 bits (0..127)
6338   // 0x01 - insert into q1 128 bits (128..255)
6339   // 0x02 - insert into q2 128 bits (256..383)
6340   // 0x03 - insert into q3 128 bits (384..511)
6341   emit_int8(imm8 & 0x03);
6342 }
6343 
6344 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6345   assert(VM_Version::supports_evex(), "");
6346   assert(imm8 <= 0x01, "imm8: %u", imm8);
6347   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6348   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6349   emit_int8(0x1A);
6350   emit_int8((unsigned char)(0xC0 | encode));
6351   // 0x00 - insert into lower 256 bits
6352   // 0x01 - insert into upper 256 bits
6353   emit_int8(imm8 & 0x01);
6354 }
6355 
6356 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6357   assert(VM_Version::supports_evex(), "");
6358   assert(dst != xnoreg, "sanity");
6359   assert(imm8 <= 0x01, "imm8: %u", imm8);
6360   InstructionMark im(this);
6361   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6362   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
6363   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6364   emit_int8(0x1A);
6365   emit_operand(dst, src);
6366   // 0x00 - insert into lower 256 bits
6367   // 0x01 - insert into upper 256 bits
6368   emit_int8(imm8 & 0x01);
6369 }
6370 
6371 
6372 // vextracti forms
6373 
6374 void Assembler::vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6375   assert(VM_Version::supports_avx(), "");
6376   assert(imm8 <= 0x01, "imm8: %u", imm8);
6377   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6378   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6379   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6380   emit_int8(0x39);
6381   emit_int8((unsigned char)(0xC0 | encode));
6382   // 0x00 - extract from lower 128 bits
6383   // 0x01 - extract from upper 128 bits
6384   emit_int8(imm8 & 0x01);
6385 }
6386 
6387 void Assembler::vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
6388   assert(VM_Version::supports_avx2(), "");
6389   assert(src != xnoreg, "sanity");
6390   assert(imm8 <= 0x01, "imm8: %u", imm8);
6391   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6392   InstructionMark im(this);
6393   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6394   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6395   attributes.reset_is_clear_context();
6396   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6397   emit_int8(0x39);
6398   emit_operand(src, dst);
6399   // 0x00 - extract from lower 128 bits
6400   // 0x01 - extract from upper 128 bits
6401   emit_int8(imm8 & 0x01);
6402 }
6403 
6404 void Assembler::vextracti32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6405   assert(VM_Version::supports_avx(), "");
6406   assert(imm8 <= 0x03, "imm8: %u", imm8);
6407   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6408   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6409   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6410   emit_int8(0x39);
6411   emit_int8((unsigned char)(0xC0 | encode));
6412   // 0x00 - extract from bits 127:0
6413   // 0x01 - extract from bits 255:128
6414   // 0x02 - extract from bits 383:256
6415   // 0x03 - extract from bits 511:384
6416   emit_int8(imm8 & 0x03);
6417 }
6418 
6419 void Assembler::vextracti32x4(Address dst, XMMRegister src, uint8_t imm8) {
6420   assert(VM_Version::supports_evex(), "");
6421   assert(src != xnoreg, "sanity");
6422   assert(imm8 <= 0x03, "imm8: %u", imm8);
6423   InstructionMark im(this);
6424   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6425   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6426   attributes.reset_is_clear_context();
6427   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6428   emit_int8(0x39);
6429   emit_operand(src, dst);
6430   // 0x00 - extract from bits 127:0
6431   // 0x01 - extract from bits 255:128
6432   // 0x02 - extract from bits 383:256
6433   // 0x03 - extract from bits 511:384
6434   emit_int8(imm8 & 0x03);
6435 }
6436 
6437 void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6438   assert(VM_Version::supports_avx512dq(), "");
6439   assert(imm8 <= 0x03, "imm8: %u", imm8);
6440   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6441   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6442   emit_int8(0x39);
6443   emit_int8((unsigned char)(0xC0 | encode));
6444   // 0x00 - extract from bits 127:0
6445   // 0x01 - extract from bits 255:128
6446   // 0x02 - extract from bits 383:256
6447   // 0x03 - extract from bits 511:384
6448   emit_int8(imm8 & 0x03);
6449 }
6450 
6451 void Assembler::vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6452   assert(VM_Version::supports_evex(), "");
6453   assert(imm8 <= 0x01, "imm8: %u", imm8);
6454   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6455   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6456   emit_int8(0x3B);
6457   emit_int8((unsigned char)(0xC0 | encode));
6458   // 0x00 - extract from lower 256 bits
6459   // 0x01 - extract from upper 256 bits
6460   emit_int8(imm8 & 0x01);
6461 }
6462 
6463 
6464 // vextractf forms
6465 
6466 void Assembler::vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6467   assert(VM_Version::supports_avx(), "");
6468   assert(imm8 <= 0x01, "imm8: %u", imm8);
6469   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6470   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6471   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6472   emit_int8(0x19);
6473   emit_int8((unsigned char)(0xC0 | encode));
6474   // 0x00 - extract from lower 128 bits
6475   // 0x01 - extract from upper 128 bits
6476   emit_int8(imm8 & 0x01);
6477 }
6478 
6479 void Assembler::vextractf128(Address dst, XMMRegister src, uint8_t imm8) {
6480   assert(VM_Version::supports_avx(), "");
6481   assert(src != xnoreg, "sanity");
6482   assert(imm8 <= 0x01, "imm8: %u", imm8);
6483   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6484   InstructionMark im(this);
6485   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6486   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6487   attributes.reset_is_clear_context();
6488   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6489   emit_int8(0x19);
6490   emit_operand(src, dst);
6491   // 0x00 - extract from lower 128 bits
6492   // 0x01 - extract from upper 128 bits
6493   emit_int8(imm8 & 0x01);
6494 }
6495 
6496 void Assembler::vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6497   assert(VM_Version::supports_avx(), "");
6498   assert(imm8 <= 0x03, "imm8: %u", imm8);
6499   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6500   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6501   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6502   emit_int8(0x19);
6503   emit_int8((unsigned char)(0xC0 | encode));
6504   // 0x00 - extract from bits 127:0
6505   // 0x01 - extract from bits 255:128
6506   // 0x02 - extract from bits 383:256
6507   // 0x03 - extract from bits 511:384
6508   emit_int8(imm8 & 0x03);
6509 }
6510 
6511 void Assembler::vextractf32x4(Address dst, XMMRegister src, uint8_t imm8) {
6512   assert(VM_Version::supports_evex(), "");
6513   assert(src != xnoreg, "sanity");
6514   assert(imm8 <= 0x03, "imm8: %u", imm8);
6515   InstructionMark im(this);
6516   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6517   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6518   attributes.reset_is_clear_context();
6519   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6520   emit_int8(0x19);
6521   emit_operand(src, dst);
6522   // 0x00 - extract from bits 127:0
6523   // 0x01 - extract from bits 255:128
6524   // 0x02 - extract from bits 383:256
6525   // 0x03 - extract from bits 511:384
6526   emit_int8(imm8 & 0x03);
6527 }
6528 
6529 void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6530   assert(VM_Version::supports_avx512dq(), "");
6531   assert(imm8 <= 0x03, "imm8: %u", imm8);
6532   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6533   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6534   emit_int8(0x19);
6535   emit_int8((unsigned char)(0xC0 | encode));
6536   // 0x00 - extract from bits 127:0
6537   // 0x01 - extract from bits 255:128
6538   // 0x02 - extract from bits 383:256
6539   // 0x03 - extract from bits 511:384
6540   emit_int8(imm8 & 0x03);
6541 }
6542 
6543 void Assembler::vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6544   assert(VM_Version::supports_evex(), "");
6545   assert(imm8 <= 0x01, "imm8: %u", imm8);
6546   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6547   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6548   emit_int8(0x1B);
6549   emit_int8((unsigned char)(0xC0 | encode));
6550   // 0x00 - extract from lower 256 bits
6551   // 0x01 - extract from upper 256 bits
6552   emit_int8(imm8 & 0x01);
6553 }
6554 
6555 void Assembler::vextractf64x4(Address dst, XMMRegister src, uint8_t imm8) {
6556   assert(VM_Version::supports_evex(), "");
6557   assert(src != xnoreg, "sanity");
6558   assert(imm8 <= 0x01, "imm8: %u", imm8);
6559   InstructionMark im(this);
6560   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6561   attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */  EVEX_64bit);
6562   attributes.reset_is_clear_context();
6563   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6564   emit_int8(0x1B);
6565   emit_operand(src, dst);
6566   // 0x00 - extract from lower 256 bits
6567   // 0x01 - extract from upper 256 bits
6568   emit_int8(imm8 & 0x01);
6569 }
6570 
6571 
6572 // legacy word/dword replicate
6573 void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
6574   assert(VM_Version::supports_avx2(), "");
6575   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6576   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6577   emit_int8(0x79);
6578   emit_int8((unsigned char)(0xC0 | encode));
6579 }
6580 
6581 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
6582   assert(VM_Version::supports_avx2(), "");
6583   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6584   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6585   emit_int8(0x58);
6586   emit_int8((unsigned char)(0xC0 | encode));
6587 }
6588 
6589 
6590 // xmm/mem sourced byte/word/dword/qword replicate
6591 
6592 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6593 void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
6594   assert(VM_Version::supports_evex(), "");
6595   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6596   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6597   emit_int8(0x78);
6598   emit_int8((unsigned char)(0xC0 | encode));
6599 }
6600 
6601 void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
6602   assert(VM_Version::supports_evex(), "");
6603   assert(dst != xnoreg, "sanity");
6604   InstructionMark im(this);
6605   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6606   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
6607   // swap src<->dst for encoding
6608   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6609   emit_int8(0x78);
6610   emit_operand(dst, src);
6611 }
6612 
6613 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6614 void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
6615   assert(VM_Version::supports_evex(), "");
6616   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6617   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6618   emit_int8(0x79);
6619   emit_int8((unsigned char)(0xC0 | encode));
6620 }
6621 
6622 void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) {
6623   assert(VM_Version::supports_evex(), "");
6624   assert(dst != xnoreg, "sanity");
6625   InstructionMark im(this);
6626   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6627   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
6628   // swap src<->dst for encoding
6629   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6630   emit_int8(0x79);
6631   emit_operand(dst, src);
6632 }
6633 
6634 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
6635 void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
6636   assert(VM_Version::supports_evex(), "");
6637   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6638   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6639   emit_int8(0x58);
6640   emit_int8((unsigned char)(0xC0 | encode));
6641 }
6642 
6643 void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
6644   assert(VM_Version::supports_evex(), "");
6645   assert(dst != xnoreg, "sanity");
6646   InstructionMark im(this);
6647   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6648   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
6649   // swap src<->dst for encoding
6650   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6651   emit_int8(0x58);
6652   emit_operand(dst, src);
6653 }
6654 
6655 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
6656 void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
6657   assert(VM_Version::supports_evex(), "");
6658   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6659   attributes.set_rex_vex_w_reverted();
6660   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6661   emit_int8(0x59);
6662   emit_int8((unsigned char)(0xC0 | encode));
6663 }
6664 
6665 void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
6666   assert(VM_Version::supports_evex(), "");
6667   assert(dst != xnoreg, "sanity");
6668   InstructionMark im(this);
6669   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6670   attributes.set_rex_vex_w_reverted();
6671   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
6672   // swap src<->dst for encoding
6673   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6674   emit_int8(0x59);
6675   emit_operand(dst, src);
6676 }
6677 
6678 
6679 // scalar single/double precision replicate
6680 
6681 // duplicate single precision data from src into programmed locations in dest : requires AVX512VL
6682 void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
6683   assert(VM_Version::supports_evex(), "");
6684   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6685   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6686   emit_int8(0x18);
6687   emit_int8((unsigned char)(0xC0 | encode));
6688 }
6689 
6690 void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
6691   assert(VM_Version::supports_evex(), "");
6692   assert(dst != xnoreg, "sanity");
6693   InstructionMark im(this);
6694   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6695   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
6696   // swap src<->dst for encoding
6697   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6698   emit_int8(0x18);
6699   emit_operand(dst, src);
6700 }
6701 
6702 // duplicate double precision data from src into programmed locations in dest : requires AVX512VL
6703 void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
6704   assert(VM_Version::supports_evex(), "");
6705   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6706   attributes.set_rex_vex_w_reverted();
6707   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6708   emit_int8(0x19);
6709   emit_int8((unsigned char)(0xC0 | encode));
6710 }
6711 
6712 void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
6713   assert(VM_Version::supports_evex(), "");
6714   assert(dst != xnoreg, "sanity");
6715   InstructionMark im(this);
6716   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6717   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
6718   attributes.set_rex_vex_w_reverted();
6719   // swap src<->dst for encoding
6720   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6721   emit_int8(0x19);
6722   emit_operand(dst, src);
6723 }
6724 
6725 
6726 // gpr source broadcast forms
6727 
6728 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6729 void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
6730   assert(VM_Version::supports_evex(), "");
6731   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6732   attributes.set_is_evex_instruction();
6733   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6734   emit_int8(0x7A);
6735   emit_int8((unsigned char)(0xC0 | encode));
6736 }
6737 
6738 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6739 void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
6740   assert(VM_Version::supports_evex(), "");
6741   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6742   attributes.set_is_evex_instruction();
6743   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6744   emit_int8(0x7B);
6745   emit_int8((unsigned char)(0xC0 | encode));
6746 }
6747 
6748 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
6749 void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
6750   assert(VM_Version::supports_evex(), "");
6751   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6752   attributes.set_is_evex_instruction();
6753   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6754   emit_int8(0x7C);
6755   emit_int8((unsigned char)(0xC0 | encode));
6756 }
6757 
6758 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
6759 void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
6760   assert(VM_Version::supports_evex(), "");
6761   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6762   attributes.set_is_evex_instruction();
6763   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6764   emit_int8(0x7C);
6765   emit_int8((unsigned char)(0xC0 | encode));
6766 }
6767 
6768 
6769 // Carry-Less Multiplication Quadword
6770 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
6771   assert(VM_Version::supports_clmul(), "");
6772   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
6773   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6774   emit_int8(0x44);
6775   emit_int8((unsigned char)(0xC0 | encode));
6776   emit_int8((unsigned char)mask);
6777 }
6778 
6779 // Carry-Less Multiplication Quadword
6780 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
6781   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
6782   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
6783   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6784   emit_int8(0x44);
6785   emit_int8((unsigned char)(0xC0 | encode));
6786   emit_int8((unsigned char)mask);
6787 }
6788 
6789 void Assembler::vzeroupper() {
6790   if (VM_Version::supports_vzeroupper()) {
6791     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
6792     (void)vex_prefix_and_encode(0, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6793     emit_int8(0x77);
6794   }
6795 }
6796 
6797 #ifndef _LP64
6798 // 32bit only pieces of the assembler
6799 
6800 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
6801   // NO PREFIX AS NEVER 64BIT
6802   InstructionMark im(this);
6803   emit_int8((unsigned char)0x81);
6804   emit_int8((unsigned char)(0xF8 | src1->encoding()));
6805   emit_data(imm32, rspec, 0);
6806 }
6807 
6808 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
6809   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
6810   InstructionMark im(this);
6811   emit_int8((unsigned char)0x81);
6812   emit_operand(rdi, src1);
6813   emit_data(imm32, rspec, 0);
6814 }
6815 
6816 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
6817 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
6818 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
6819 void Assembler::cmpxchg8(Address adr) {
6820   InstructionMark im(this);
6821   emit_int8(0x0F);
6822   emit_int8((unsigned char)0xC7);
6823   emit_operand(rcx, adr);
6824 }
6825 
6826 void Assembler::decl(Register dst) {
6827   // Don't use it directly. Use MacroAssembler::decrementl() instead.
6828  emit_int8(0x48 | dst->encoding());
6829 }
6830 
6831 #endif // _LP64
6832 
6833 // 64bit typically doesn't use the x87 but needs to for the trig funcs
6834 
6835 void Assembler::fabs() {
6836   emit_int8((unsigned char)0xD9);
6837   emit_int8((unsigned char)0xE1);
6838 }
6839 
6840 void Assembler::fadd(int i) {
6841   emit_farith(0xD8, 0xC0, i);
6842 }
6843 
6844 void Assembler::fadd_d(Address src) {
6845   InstructionMark im(this);
6846   emit_int8((unsigned char)0xDC);
6847   emit_operand32(rax, src);
6848 }
6849 
6850 void Assembler::fadd_s(Address src) {
6851   InstructionMark im(this);
6852   emit_int8((unsigned char)0xD8);
6853   emit_operand32(rax, src);
6854 }
6855 
6856 void Assembler::fadda(int i) {
6857   emit_farith(0xDC, 0xC0, i);
6858 }
6859 
6860 void Assembler::faddp(int i) {
6861   emit_farith(0xDE, 0xC0, i);
6862 }
6863 
6864 void Assembler::fchs() {
6865   emit_int8((unsigned char)0xD9);
6866   emit_int8((unsigned char)0xE0);
6867 }
6868 
6869 void Assembler::fcom(int i) {
6870   emit_farith(0xD8, 0xD0, i);
6871 }
6872 
6873 void Assembler::fcomp(int i) {
6874   emit_farith(0xD8, 0xD8, i);
6875 }
6876 
6877 void Assembler::fcomp_d(Address src) {
6878   InstructionMark im(this);
6879   emit_int8((unsigned char)0xDC);
6880   emit_operand32(rbx, src);
6881 }
6882 
6883 void Assembler::fcomp_s(Address src) {
6884   InstructionMark im(this);
6885   emit_int8((unsigned char)0xD8);
6886   emit_operand32(rbx, src);
6887 }
6888 
6889 void Assembler::fcompp() {
6890   emit_int8((unsigned char)0xDE);
6891   emit_int8((unsigned char)0xD9);
6892 }
6893 
6894 void Assembler::fcos() {
6895   emit_int8((unsigned char)0xD9);
6896   emit_int8((unsigned char)0xFF);
6897 }
6898 
6899 void Assembler::fdecstp() {
6900   emit_int8((unsigned char)0xD9);
6901   emit_int8((unsigned char)0xF6);
6902 }
6903 
6904 void Assembler::fdiv(int i) {
6905   emit_farith(0xD8, 0xF0, i);
6906 }
6907 
6908 void Assembler::fdiv_d(Address src) {
6909   InstructionMark im(this);
6910   emit_int8((unsigned char)0xDC);
6911   emit_operand32(rsi, src);
6912 }
6913 
6914 void Assembler::fdiv_s(Address src) {
6915   InstructionMark im(this);
6916   emit_int8((unsigned char)0xD8);
6917   emit_operand32(rsi, src);
6918 }
6919 
6920 void Assembler::fdiva(int i) {
6921   emit_farith(0xDC, 0xF8, i);
6922 }
6923 
6924 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
6925 //       is erroneous for some of the floating-point instructions below.
6926 
6927 void Assembler::fdivp(int i) {
6928   emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
6929 }
6930 
6931 void Assembler::fdivr(int i) {
6932   emit_farith(0xD8, 0xF8, i);
6933 }
6934 
6935 void Assembler::fdivr_d(Address src) {
6936   InstructionMark im(this);
6937   emit_int8((unsigned char)0xDC);
6938   emit_operand32(rdi, src);
6939 }
6940 
6941 void Assembler::fdivr_s(Address src) {
6942   InstructionMark im(this);
6943   emit_int8((unsigned char)0xD8);
6944   emit_operand32(rdi, src);
6945 }
6946 
6947 void Assembler::fdivra(int i) {
6948   emit_farith(0xDC, 0xF0, i);
6949 }
6950 
6951 void Assembler::fdivrp(int i) {
6952   emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
6953 }
6954 
6955 void Assembler::ffree(int i) {
6956   emit_farith(0xDD, 0xC0, i);
6957 }
6958 
6959 void Assembler::fild_d(Address adr) {
6960   InstructionMark im(this);
6961   emit_int8((unsigned char)0xDF);
6962   emit_operand32(rbp, adr);
6963 }
6964 
6965 void Assembler::fild_s(Address adr) {
6966   InstructionMark im(this);
6967   emit_int8((unsigned char)0xDB);
6968   emit_operand32(rax, adr);
6969 }
6970 
6971 void Assembler::fincstp() {
6972   emit_int8((unsigned char)0xD9);
6973   emit_int8((unsigned char)0xF7);
6974 }
6975 
6976 void Assembler::finit() {
6977   emit_int8((unsigned char)0x9B);
6978   emit_int8((unsigned char)0xDB);
6979   emit_int8((unsigned char)0xE3);
6980 }
6981 
6982 void Assembler::fist_s(Address adr) {
6983   InstructionMark im(this);
6984   emit_int8((unsigned char)0xDB);
6985   emit_operand32(rdx, adr);
6986 }
6987 
6988 void Assembler::fistp_d(Address adr) {
6989   InstructionMark im(this);
6990   emit_int8((unsigned char)0xDF);
6991   emit_operand32(rdi, adr);
6992 }
6993 
6994 void Assembler::fistp_s(Address adr) {
6995   InstructionMark im(this);
6996   emit_int8((unsigned char)0xDB);
6997   emit_operand32(rbx, adr);
6998 }
6999 
7000 void Assembler::fld1() {
7001   emit_int8((unsigned char)0xD9);
7002   emit_int8((unsigned char)0xE8);
7003 }
7004 
7005 void Assembler::fld_d(Address adr) {
7006   InstructionMark im(this);
7007   emit_int8((unsigned char)0xDD);
7008   emit_operand32(rax, adr);
7009 }
7010 
7011 void Assembler::fld_s(Address adr) {
7012   InstructionMark im(this);
7013   emit_int8((unsigned char)0xD9);
7014   emit_operand32(rax, adr);
7015 }
7016 
7017 
7018 void Assembler::fld_s(int index) {
7019   emit_farith(0xD9, 0xC0, index);
7020 }
7021 
7022 void Assembler::fld_x(Address adr) {
7023   InstructionMark im(this);
7024   emit_int8((unsigned char)0xDB);
7025   emit_operand32(rbp, adr);
7026 }
7027 
7028 void Assembler::fldcw(Address src) {
7029   InstructionMark im(this);
7030   emit_int8((unsigned char)0xD9);
7031   emit_operand32(rbp, src);
7032 }
7033 
7034 void Assembler::fldenv(Address src) {
7035   InstructionMark im(this);
7036   emit_int8((unsigned char)0xD9);
7037   emit_operand32(rsp, src);
7038 }
7039 
7040 void Assembler::fldlg2() {
7041   emit_int8((unsigned char)0xD9);
7042   emit_int8((unsigned char)0xEC);
7043 }
7044 
7045 void Assembler::fldln2() {
7046   emit_int8((unsigned char)0xD9);
7047   emit_int8((unsigned char)0xED);
7048 }
7049 
7050 void Assembler::fldz() {
7051   emit_int8((unsigned char)0xD9);
7052   emit_int8((unsigned char)0xEE);
7053 }
7054 
7055 void Assembler::flog() {
7056   fldln2();
7057   fxch();
7058   fyl2x();
7059 }
7060 
7061 void Assembler::flog10() {
7062   fldlg2();
7063   fxch();
7064   fyl2x();
7065 }
7066 
7067 void Assembler::fmul(int i) {
7068   emit_farith(0xD8, 0xC8, i);
7069 }
7070 
7071 void Assembler::fmul_d(Address src) {
7072   InstructionMark im(this);
7073   emit_int8((unsigned char)0xDC);
7074   emit_operand32(rcx, src);
7075 }
7076 
7077 void Assembler::fmul_s(Address src) {
7078   InstructionMark im(this);
7079   emit_int8((unsigned char)0xD8);
7080   emit_operand32(rcx, src);
7081 }
7082 
7083 void Assembler::fmula(int i) {
7084   emit_farith(0xDC, 0xC8, i);
7085 }
7086 
7087 void Assembler::fmulp(int i) {
7088   emit_farith(0xDE, 0xC8, i);
7089 }
7090 
7091 void Assembler::fnsave(Address dst) {
7092   InstructionMark im(this);
7093   emit_int8((unsigned char)0xDD);
7094   emit_operand32(rsi, dst);
7095 }
7096 
7097 void Assembler::fnstcw(Address src) {
7098   InstructionMark im(this);
7099   emit_int8((unsigned char)0x9B);
7100   emit_int8((unsigned char)0xD9);
7101   emit_operand32(rdi, src);
7102 }
7103 
7104 void Assembler::fnstsw_ax() {
7105   emit_int8((unsigned char)0xDF);
7106   emit_int8((unsigned char)0xE0);
7107 }
7108 
7109 void Assembler::fprem() {
7110   emit_int8((unsigned char)0xD9);
7111   emit_int8((unsigned char)0xF8);
7112 }
7113 
7114 void Assembler::fprem1() {
7115   emit_int8((unsigned char)0xD9);
7116   emit_int8((unsigned char)0xF5);
7117 }
7118 
7119 void Assembler::frstor(Address src) {
7120   InstructionMark im(this);
7121   emit_int8((unsigned char)0xDD);
7122   emit_operand32(rsp, src);
7123 }
7124 
7125 void Assembler::fsin() {
7126   emit_int8((unsigned char)0xD9);
7127   emit_int8((unsigned char)0xFE);
7128 }
7129 
7130 void Assembler::fsqrt() {
7131   emit_int8((unsigned char)0xD9);
7132   emit_int8((unsigned char)0xFA);
7133 }
7134 
7135 void Assembler::fst_d(Address adr) {
7136   InstructionMark im(this);
7137   emit_int8((unsigned char)0xDD);
7138   emit_operand32(rdx, adr);
7139 }
7140 
7141 void Assembler::fst_s(Address adr) {
7142   InstructionMark im(this);
7143   emit_int8((unsigned char)0xD9);
7144   emit_operand32(rdx, adr);
7145 }
7146 
7147 void Assembler::fstp_d(Address adr) {
7148   InstructionMark im(this);
7149   emit_int8((unsigned char)0xDD);
7150   emit_operand32(rbx, adr);
7151 }
7152 
7153 void Assembler::fstp_d(int index) {
7154   emit_farith(0xDD, 0xD8, index);
7155 }
7156 
7157 void Assembler::fstp_s(Address adr) {
7158   InstructionMark im(this);
7159   emit_int8((unsigned char)0xD9);
7160   emit_operand32(rbx, adr);
7161 }
7162 
7163 void Assembler::fstp_x(Address adr) {
7164   InstructionMark im(this);
7165   emit_int8((unsigned char)0xDB);
7166   emit_operand32(rdi, adr);
7167 }
7168 
7169 void Assembler::fsub(int i) {
7170   emit_farith(0xD8, 0xE0, i);
7171 }
7172 
7173 void Assembler::fsub_d(Address src) {
7174   InstructionMark im(this);
7175   emit_int8((unsigned char)0xDC);
7176   emit_operand32(rsp, src);
7177 }
7178 
7179 void Assembler::fsub_s(Address src) {
7180   InstructionMark im(this);
7181   emit_int8((unsigned char)0xD8);
7182   emit_operand32(rsp, src);
7183 }
7184 
7185 void Assembler::fsuba(int i) {
7186   emit_farith(0xDC, 0xE8, i);
7187 }
7188 
7189 void Assembler::fsubp(int i) {
7190   emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
7191 }
7192 
7193 void Assembler::fsubr(int i) {
7194   emit_farith(0xD8, 0xE8, i);
7195 }
7196 
7197 void Assembler::fsubr_d(Address src) {
7198   InstructionMark im(this);
7199   emit_int8((unsigned char)0xDC);
7200   emit_operand32(rbp, src);
7201 }
7202 
7203 void Assembler::fsubr_s(Address src) {
7204   InstructionMark im(this);
7205   emit_int8((unsigned char)0xD8);
7206   emit_operand32(rbp, src);
7207 }
7208 
7209 void Assembler::fsubra(int i) {
7210   emit_farith(0xDC, 0xE0, i);
7211 }
7212 
7213 void Assembler::fsubrp(int i) {
7214   emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
7215 }
7216 
7217 void Assembler::ftan() {
7218   emit_int8((unsigned char)0xD9);
7219   emit_int8((unsigned char)0xF2);
7220   emit_int8((unsigned char)0xDD);
7221   emit_int8((unsigned char)0xD8);
7222 }
7223 
7224 void Assembler::ftst() {
7225   emit_int8((unsigned char)0xD9);
7226   emit_int8((unsigned char)0xE4);
7227 }
7228 
7229 void Assembler::fucomi(int i) {
7230   // make sure the instruction is supported (introduced for P6, together with cmov)
7231   guarantee(VM_Version::supports_cmov(), "illegal instruction");
7232   emit_farith(0xDB, 0xE8, i);
7233 }
7234 
7235 void Assembler::fucomip(int i) {
7236   // make sure the instruction is supported (introduced for P6, together with cmov)
7237   guarantee(VM_Version::supports_cmov(), "illegal instruction");
7238   emit_farith(0xDF, 0xE8, i);
7239 }
7240 
7241 void Assembler::fwait() {
7242   emit_int8((unsigned char)0x9B);
7243 }
7244 
7245 void Assembler::fxch(int i) {
7246   emit_farith(0xD9, 0xC8, i);
7247 }
7248 
7249 void Assembler::fyl2x() {
7250   emit_int8((unsigned char)0xD9);
7251   emit_int8((unsigned char)0xF1);
7252 }
7253 
7254 void Assembler::frndint() {
7255   emit_int8((unsigned char)0xD9);
7256   emit_int8((unsigned char)0xFC);
7257 }
7258 
7259 void Assembler::f2xm1() {
7260   emit_int8((unsigned char)0xD9);
7261   emit_int8((unsigned char)0xF0);
7262 }
7263 
7264 void Assembler::fldl2e() {
7265   emit_int8((unsigned char)0xD9);
7266   emit_int8((unsigned char)0xEA);
7267 }
7268 
7269 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
7270 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
7271 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
7272 static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
7273 
7274 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
7275 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
7276   if (pre > 0) {
7277     emit_int8(simd_pre[pre]);
7278   }
7279   if (rex_w) {
7280     prefixq(adr, xreg);
7281   } else {
7282     prefix(adr, xreg);
7283   }
7284   if (opc > 0) {
7285     emit_int8(0x0F);
7286     int opc2 = simd_opc[opc];
7287     if (opc2 > 0) {
7288       emit_int8(opc2);
7289     }
7290   }
7291 }
7292 
7293 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
7294   if (pre > 0) {
7295     emit_int8(simd_pre[pre]);
7296   }
7297   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : prefix_and_encode(dst_enc, src_enc);
7298   if (opc > 0) {
7299     emit_int8(0x0F);
7300     int opc2 = simd_opc[opc];
7301     if (opc2 > 0) {
7302       emit_int8(opc2);
7303     }
7304   }
7305   return encode;
7306 }
7307 
7308 
7309 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc) {
7310   int vector_len = _attributes->get_vector_len();
7311   bool vex_w = _attributes->is_rex_vex_w();
7312   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
7313     prefix(VEX_3bytes);
7314 
7315     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
7316     byte1 = (~byte1) & 0xE0;
7317     byte1 |= opc;
7318     emit_int8(byte1);
7319 
7320     int byte2 = ((~nds_enc) & 0xf) << 3;
7321     byte2 |= (vex_w ? VEX_W : 0) | ((vector_len > 0) ? 4 : 0) | pre;
7322     emit_int8(byte2);
7323   } else {
7324     prefix(VEX_2bytes);
7325 
7326     int byte1 = vex_r ? VEX_R : 0;
7327     byte1 = (~byte1) & 0x80;
7328     byte1 |= ((~nds_enc) & 0xf) << 3;
7329     byte1 |= ((vector_len > 0 ) ? 4 : 0) | pre;
7330     emit_int8(byte1);
7331   }
7332 }
7333 
7334 // This is a 4 byte encoding
7335 void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v, int nds_enc, VexSimdPrefix pre, VexOpcode opc){
7336   // EVEX 0x62 prefix
7337   prefix(EVEX_4bytes);
7338   bool vex_w = _attributes->is_rex_vex_w();
7339   int evex_encoding = (vex_w ? VEX_W : 0);
7340   // EVEX.b is not currently used for broadcast of single element or data rounding modes
7341   _attributes->set_evex_encoding(evex_encoding);
7342 
7343   // P0: byte 2, initialized to RXBR`00mm
7344   // instead of not'd
7345   int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0);
7346   byte2 = (~byte2) & 0xF0;
7347   // confine opc opcode extensions in mm bits to lower two bits
7348   // of form {0F, 0F_38, 0F_3A}
7349   byte2 |= opc;
7350   emit_int8(byte2);
7351 
7352   // P1: byte 3 as Wvvvv1pp
7353   int byte3 = ((~nds_enc) & 0xf) << 3;
7354   // p[10] is always 1
7355   byte3 |= EVEX_F;
7356   byte3 |= (vex_w & 1) << 7;
7357   // confine pre opcode extensions in pp bits to lower two bits
7358   // of form {66, F3, F2}
7359   byte3 |= pre;
7360   emit_int8(byte3);
7361 
7362   // P2: byte 4 as zL'Lbv'aaa
7363   // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
7364   int byte4 = (_attributes->is_no_reg_mask()) ?
7365               0 :
7366               _attributes->get_embedded_opmask_register_specifier();
7367   // EVEX.v` for extending EVEX.vvvv or VIDX
7368   byte4 |= (evex_v ? 0: EVEX_V);
7369   // third EXEC.b for broadcast actions
7370   byte4 |= (_attributes->is_extended_context() ? EVEX_Rb : 0);
7371   // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
7372   byte4 |= ((_attributes->get_vector_len())& 0x3) << 5;
7373   // last is EVEX.z for zero/merge actions
7374   if (_attributes->is_no_reg_mask() == false) {
7375     byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0);
7376   }
7377   emit_int8(byte4);
7378 }
7379 
7380 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
7381   bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0;
7382   bool vex_b = adr.base_needs_rex();
7383   bool vex_x = adr.index_needs_rex();
7384   set_attributes(attributes);
7385   attributes->set_current_assembler(this);
7386 
7387   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
7388   if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
7389     switch (attributes->get_vector_len()) {
7390     case AVX_128bit:
7391     case AVX_256bit:
7392       attributes->set_is_legacy_mode();
7393       break;
7394     }
7395   }
7396 
7397   // For pure EVEX check and see if this instruction
7398   // is allowed in legacy mode and has resources which will
7399   // fit in it.  Pure EVEX instructions will use set_is_evex_instruction in their definition,
7400   // else that field is set when we encode to EVEX
7401   if (UseAVX > 2 && !attributes->is_legacy_mode() &&
7402       !_is_managed && !attributes->is_evex_instruction()) {
7403     if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
7404       bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
7405       if (check_register_bank) {
7406         // check nds_enc and xreg_enc for upper bank usage
7407         if (nds_enc < 16 && xreg_enc < 16) {
7408           attributes->set_is_legacy_mode();
7409         }
7410       } else {
7411         attributes->set_is_legacy_mode();
7412       }
7413     }
7414   }
7415 
7416   _is_managed = false;
7417   if (UseAVX > 2 && !attributes->is_legacy_mode())
7418   {
7419     bool evex_r = (xreg_enc >= 16);
7420     bool evex_v = (nds_enc >= 16);
7421     attributes->set_is_evex_instruction();
7422     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
7423   } else {
7424     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
7425       attributes->set_rex_vex_w(false);
7426     }
7427     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
7428   }
7429 }
7430 
7431 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
7432   bool vex_r = ((dst_enc & 8) == 8) ? 1 : 0;
7433   bool vex_b = ((src_enc & 8) == 8) ? 1 : 0;
7434   bool vex_x = false;
7435   set_attributes(attributes);
7436   attributes->set_current_assembler(this);
7437   bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
7438 
7439   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
7440   if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
7441     switch (attributes->get_vector_len()) {
7442     case AVX_128bit:
7443     case AVX_256bit:
7444       if (check_register_bank) {
7445         if (dst_enc >= 16 || nds_enc >= 16 || src_enc >= 16) {
7446           // up propagate arithmetic instructions to meet RA requirements
7447           attributes->set_vector_len(AVX_512bit);
7448         } else {
7449           attributes->set_is_legacy_mode();
7450         }
7451       } else {
7452         attributes->set_is_legacy_mode();
7453       }
7454       break;
7455     }
7456   }
7457 
7458   // For pure EVEX check and see if this instruction
7459   // is allowed in legacy mode and has resources which will
7460   // fit in it.  Pure EVEX instructions will use set_is_evex_instruction in their definition,
7461   // else that field is set when we encode to EVEX
7462   if (UseAVX > 2 && !attributes->is_legacy_mode() &&
7463       !_is_managed && !attributes->is_evex_instruction()) {
7464     if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
7465       if (check_register_bank) {
7466         // check dst_enc, nds_enc and src_enc for upper bank usage
7467         if (dst_enc < 16 && nds_enc < 16 && src_enc < 16) {
7468           attributes->set_is_legacy_mode();
7469         }
7470       } else {
7471         attributes->set_is_legacy_mode();
7472       }
7473     }
7474   }
7475 
7476   _is_managed = false;
7477   if (UseAVX > 2 && !attributes->is_legacy_mode())
7478   {
7479     bool evex_r = (dst_enc >= 16);
7480     bool evex_v = (nds_enc >= 16);
7481     // can use vex_x as bank extender on rm encoding
7482     vex_x = (src_enc >= 16);
7483     attributes->set_is_evex_instruction();
7484     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
7485   } else {
7486     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
7487       attributes->set_rex_vex_w(false);
7488     }
7489     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
7490   }
7491 
7492   // return modrm byte components for operands
7493   return (((dst_enc & 7) << 3) | (src_enc & 7));
7494 }
7495 
7496 
7497 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
7498                             VexOpcode opc, InstructionAttr *attributes) {
7499   if (UseAVX > 0) {
7500     int xreg_enc = xreg->encoding();
7501     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
7502     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, attributes);
7503   } else {
7504     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
7505     rex_prefix(adr, xreg, pre, opc, attributes->is_rex_vex_w());
7506   }
7507 }
7508 
7509 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
7510                                       VexOpcode opc, InstructionAttr *attributes) {
7511   int dst_enc = dst->encoding();
7512   int src_enc = src->encoding();
7513   if (UseAVX > 0) {
7514     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
7515     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes);
7516   } else {
7517     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
7518     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, attributes->is_rex_vex_w());
7519   }
7520 }
7521 
7522 void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
7523   assert(VM_Version::supports_avx(), "");
7524   assert(!VM_Version::supports_evex(), "");
7525   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7526   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7527   emit_int8((unsigned char)0xC2);
7528   emit_int8((unsigned char)(0xC0 | encode));
7529   emit_int8((unsigned char)(0xF & cop));
7530 }
7531 
7532 void Assembler::blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
7533   assert(VM_Version::supports_avx(), "");
7534   assert(!VM_Version::supports_evex(), "");
7535   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7536   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7537   emit_int8((unsigned char)0x4B);
7538   emit_int8((unsigned char)(0xC0 | encode));
7539   int src2_enc = src2->encoding();
7540   emit_int8((unsigned char)(0xF0 & src2_enc<<4));
7541 }
7542 
7543 void Assembler::cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
7544   assert(VM_Version::supports_avx(), "");
7545   assert(!VM_Version::supports_evex(), "");
7546   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7547   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
7548   emit_int8((unsigned char)0xC2);
7549   emit_int8((unsigned char)(0xC0 | encode));
7550   emit_int8((unsigned char)(0xF & cop));
7551 }
7552 
7553 void Assembler::blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
7554   assert(VM_Version::supports_avx(), "");
7555   assert(!VM_Version::supports_evex(), "");
7556   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7557   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7558   emit_int8((unsigned char)0x4A);
7559   emit_int8((unsigned char)(0xC0 | encode));
7560   int src2_enc = src2->encoding();
7561   emit_int8((unsigned char)(0xF0 & src2_enc<<4));
7562 }
7563 
7564 void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
7565   assert(VM_Version::supports_avx2(), "");
7566   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7567   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7568   emit_int8((unsigned char)0x02);
7569   emit_int8((unsigned char)(0xC0 | encode));
7570   emit_int8((unsigned char)imm8);
7571 }
7572 
7573 void Assembler::shlxl(Register dst, Register src1, Register src2) {
7574   assert(VM_Version::supports_bmi2(), "");
7575   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
7576   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7577   emit_int8((unsigned char)0xF7);
7578   emit_int8((unsigned char)(0xC0 | encode));
7579 }
7580 
7581 void Assembler::shlxq(Register dst, Register src1, Register src2) {
7582   assert(VM_Version::supports_bmi2(), "");
7583   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
7584   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7585   emit_int8((unsigned char)0xF7);
7586   emit_int8((unsigned char)(0xC0 | encode));
7587 }
7588 
7589 #ifndef _LP64
7590 
7591 void Assembler::incl(Register dst) {
7592   // Don't use it directly. Use MacroAssembler::incrementl() instead.
7593   emit_int8(0x40 | dst->encoding());
7594 }
7595 
7596 void Assembler::lea(Register dst, Address src) {
7597   leal(dst, src);
7598 }
7599 
7600 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) {
7601   InstructionMark im(this);
7602   emit_int8((unsigned char)0xC7);
7603   emit_operand(rax, dst);
7604   emit_data((int)imm32, rspec, 0);
7605 }
7606 
7607 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
7608   InstructionMark im(this);
7609   int encode = prefix_and_encode(dst->encoding());
7610   emit_int8((unsigned char)(0xB8 | encode));
7611   emit_data((int)imm32, rspec, 0);
7612 }
7613 
7614 void Assembler::popa() { // 32bit
7615   emit_int8(0x61);
7616 }
7617 
7618 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
7619   InstructionMark im(this);
7620   emit_int8(0x68);
7621   emit_data(imm32, rspec, 0);
7622 }
7623 
7624 void Assembler::pusha() { // 32bit
7625   emit_int8(0x60);
7626 }
7627 
7628 void Assembler::set_byte_if_not_zero(Register dst) {
7629   emit_int8(0x0F);
7630   emit_int8((unsigned char)0x95);
7631   emit_int8((unsigned char)(0xE0 | dst->encoding()));
7632 }
7633 
7634 void Assembler::shldl(Register dst, Register src) {
7635   emit_int8(0x0F);
7636   emit_int8((unsigned char)0xA5);
7637   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7638 }
7639 
7640 // 0F A4 / r ib
7641 void Assembler::shldl(Register dst, Register src, int8_t imm8) {
7642   emit_int8(0x0F);
7643   emit_int8((unsigned char)0xA4);
7644   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7645   emit_int8(imm8);
7646 }
7647 
7648 void Assembler::shrdl(Register dst, Register src) {
7649   emit_int8(0x0F);
7650   emit_int8((unsigned char)0xAD);
7651   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7652 }
7653 
7654 #else // LP64
7655 
7656 void Assembler::set_byte_if_not_zero(Register dst) {
7657   int enc = prefix_and_encode(dst->encoding(), true);
7658   emit_int8(0x0F);
7659   emit_int8((unsigned char)0x95);
7660   emit_int8((unsigned char)(0xE0 | enc));
7661 }
7662 
7663 // 64bit only pieces of the assembler
7664 // This should only be used by 64bit instructions that can use rip-relative
7665 // it cannot be used by instructions that want an immediate value.
7666 
7667 bool Assembler::reachable(AddressLiteral adr) {
7668   int64_t disp;
7669   // None will force a 64bit literal to the code stream. Likely a placeholder
7670   // for something that will be patched later and we need to certain it will
7671   // always be reachable.
7672   if (adr.reloc() == relocInfo::none) {
7673     return false;
7674   }
7675   if (adr.reloc() == relocInfo::internal_word_type) {
7676     // This should be rip relative and easily reachable.
7677     return true;
7678   }
7679   if (adr.reloc() == relocInfo::virtual_call_type ||
7680       adr.reloc() == relocInfo::opt_virtual_call_type ||
7681       adr.reloc() == relocInfo::static_call_type ||
7682       adr.reloc() == relocInfo::static_stub_type ) {
7683     // This should be rip relative within the code cache and easily
7684     // reachable until we get huge code caches. (At which point
7685     // ic code is going to have issues).
7686     return true;
7687   }
7688   if (adr.reloc() != relocInfo::external_word_type &&
7689       adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
7690       adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
7691       adr.reloc() != relocInfo::runtime_call_type ) {
7692     return false;
7693   }
7694 
7695   // Stress the correction code
7696   if (ForceUnreachable) {
7697     // Must be runtimecall reloc, see if it is in the codecache
7698     // Flipping stuff in the codecache to be unreachable causes issues
7699     // with things like inline caches where the additional instructions
7700     // are not handled.
7701     if (CodeCache::find_blob(adr._target) == NULL) {
7702       return false;
7703     }
7704   }
7705   // For external_word_type/runtime_call_type if it is reachable from where we
7706   // are now (possibly a temp buffer) and where we might end up
7707   // anywhere in the codeCache then we are always reachable.
7708   // This would have to change if we ever save/restore shared code
7709   // to be more pessimistic.
7710   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
7711   if (!is_simm32(disp)) return false;
7712   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
7713   if (!is_simm32(disp)) return false;
7714 
7715   disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
7716 
7717   // Because rip relative is a disp + address_of_next_instruction and we
7718   // don't know the value of address_of_next_instruction we apply a fudge factor
7719   // to make sure we will be ok no matter the size of the instruction we get placed into.
7720   // We don't have to fudge the checks above here because they are already worst case.
7721 
7722   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
7723   // + 4 because better safe than sorry.
7724   const int fudge = 12 + 4;
7725   if (disp < 0) {
7726     disp -= fudge;
7727   } else {
7728     disp += fudge;
7729   }
7730   return is_simm32(disp);
7731 }
7732 
7733 // Check if the polling page is not reachable from the code cache using rip-relative
7734 // addressing.
7735 bool Assembler::is_polling_page_far() {
7736   intptr_t addr = (intptr_t)os::get_polling_page();
7737   return ForceUnreachable ||
7738          !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
7739          !is_simm32(addr - (intptr_t)CodeCache::high_bound());
7740 }
7741 
7742 void Assembler::emit_data64(jlong data,
7743                             relocInfo::relocType rtype,
7744                             int format) {
7745   if (rtype == relocInfo::none) {
7746     emit_int64(data);
7747   } else {
7748     emit_data64(data, Relocation::spec_simple(rtype), format);
7749   }
7750 }
7751 
7752 void Assembler::emit_data64(jlong data,
7753                             RelocationHolder const& rspec,
7754                             int format) {
7755   assert(imm_operand == 0, "default format must be immediate in this file");
7756   assert(imm_operand == format, "must be immediate");
7757   assert(inst_mark() != NULL, "must be inside InstructionMark");
7758   // Do not use AbstractAssembler::relocate, which is not intended for
7759   // embedded words.  Instead, relocate to the enclosing instruction.
7760   code_section()->relocate(inst_mark(), rspec, format);
7761 #ifdef ASSERT
7762   check_relocation(rspec, format);
7763 #endif
7764   emit_int64(data);
7765 }
7766 
7767 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
7768   if (reg_enc >= 8) {
7769     prefix(REX_B);
7770     reg_enc -= 8;
7771   } else if (byteinst && reg_enc >= 4) {
7772     prefix(REX);
7773   }
7774   return reg_enc;
7775 }
7776 
7777 int Assembler::prefixq_and_encode(int reg_enc) {
7778   if (reg_enc < 8) {
7779     prefix(REX_W);
7780   } else {
7781     prefix(REX_WB);
7782     reg_enc -= 8;
7783   }
7784   return reg_enc;
7785 }
7786 
7787 int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte) {
7788   if (dst_enc < 8) {
7789     if (src_enc >= 8) {
7790       prefix(REX_B);
7791       src_enc -= 8;
7792     } else if ((src_is_byte && src_enc >= 4) || (dst_is_byte && dst_enc >= 4)) {
7793       prefix(REX);
7794     }
7795   } else {
7796     if (src_enc < 8) {
7797       prefix(REX_R);
7798     } else {
7799       prefix(REX_RB);
7800       src_enc -= 8;
7801     }
7802     dst_enc -= 8;
7803   }
7804   return dst_enc << 3 | src_enc;
7805 }
7806 
7807 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
7808   if (dst_enc < 8) {
7809     if (src_enc < 8) {
7810       prefix(REX_W);
7811     } else {
7812       prefix(REX_WB);
7813       src_enc -= 8;
7814     }
7815   } else {
7816     if (src_enc < 8) {
7817       prefix(REX_WR);
7818     } else {
7819       prefix(REX_WRB);
7820       src_enc -= 8;
7821     }
7822     dst_enc -= 8;
7823   }
7824   return dst_enc << 3 | src_enc;
7825 }
7826 
7827 void Assembler::prefix(Register reg) {
7828   if (reg->encoding() >= 8) {
7829     prefix(REX_B);
7830   }
7831 }
7832 
7833 void Assembler::prefix(Register dst, Register src, Prefix p) {
7834   if (src->encoding() >= 8) {
7835     p = (Prefix)(p | REX_B);
7836   }
7837   if (dst->encoding() >= 8) {
7838     p = (Prefix)( p | REX_R);
7839   }
7840   if (p != Prefix_EMPTY) {
7841     // do not generate an empty prefix
7842     prefix(p);
7843   }
7844 }
7845 
7846 void Assembler::prefix(Register dst, Address adr, Prefix p) {
7847   if (adr.base_needs_rex()) {
7848     if (adr.index_needs_rex()) {
7849       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
7850     } else {
7851       prefix(REX_B);
7852     }
7853   } else {
7854     if (adr.index_needs_rex()) {
7855       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
7856     }
7857   }
7858   if (dst->encoding() >= 8) {
7859     p = (Prefix)(p | REX_R);
7860   }
7861   if (p != Prefix_EMPTY) {
7862     // do not generate an empty prefix
7863     prefix(p);
7864   }
7865 }
7866 
7867 void Assembler::prefix(Address adr) {
7868   if (adr.base_needs_rex()) {
7869     if (adr.index_needs_rex()) {
7870       prefix(REX_XB);
7871     } else {
7872       prefix(REX_B);
7873     }
7874   } else {
7875     if (adr.index_needs_rex()) {
7876       prefix(REX_X);
7877     }
7878   }
7879 }
7880 
7881 void Assembler::prefixq(Address adr) {
7882   if (adr.base_needs_rex()) {
7883     if (adr.index_needs_rex()) {
7884       prefix(REX_WXB);
7885     } else {
7886       prefix(REX_WB);
7887     }
7888   } else {
7889     if (adr.index_needs_rex()) {
7890       prefix(REX_WX);
7891     } else {
7892       prefix(REX_W);
7893     }
7894   }
7895 }
7896 
7897 
7898 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
7899   if (reg->encoding() < 8) {
7900     if (adr.base_needs_rex()) {
7901       if (adr.index_needs_rex()) {
7902         prefix(REX_XB);
7903       } else {
7904         prefix(REX_B);
7905       }
7906     } else {
7907       if (adr.index_needs_rex()) {
7908         prefix(REX_X);
7909       } else if (byteinst && reg->encoding() >= 4 ) {
7910         prefix(REX);
7911       }
7912     }
7913   } else {
7914     if (adr.base_needs_rex()) {
7915       if (adr.index_needs_rex()) {
7916         prefix(REX_RXB);
7917       } else {
7918         prefix(REX_RB);
7919       }
7920     } else {
7921       if (adr.index_needs_rex()) {
7922         prefix(REX_RX);
7923       } else {
7924         prefix(REX_R);
7925       }
7926     }
7927   }
7928 }
7929 
7930 void Assembler::prefixq(Address adr, Register src) {
7931   if (src->encoding() < 8) {
7932     if (adr.base_needs_rex()) {
7933       if (adr.index_needs_rex()) {
7934         prefix(REX_WXB);
7935       } else {
7936         prefix(REX_WB);
7937       }
7938     } else {
7939       if (adr.index_needs_rex()) {
7940         prefix(REX_WX);
7941       } else {
7942         prefix(REX_W);
7943       }
7944     }
7945   } else {
7946     if (adr.base_needs_rex()) {
7947       if (adr.index_needs_rex()) {
7948         prefix(REX_WRXB);
7949       } else {
7950         prefix(REX_WRB);
7951       }
7952     } else {
7953       if (adr.index_needs_rex()) {
7954         prefix(REX_WRX);
7955       } else {
7956         prefix(REX_WR);
7957       }
7958     }
7959   }
7960 }
7961 
7962 void Assembler::prefix(Address adr, XMMRegister reg) {
7963   if (reg->encoding() < 8) {
7964     if (adr.base_needs_rex()) {
7965       if (adr.index_needs_rex()) {
7966         prefix(REX_XB);
7967       } else {
7968         prefix(REX_B);
7969       }
7970     } else {
7971       if (adr.index_needs_rex()) {
7972         prefix(REX_X);
7973       }
7974     }
7975   } else {
7976     if (adr.base_needs_rex()) {
7977       if (adr.index_needs_rex()) {
7978         prefix(REX_RXB);
7979       } else {
7980         prefix(REX_RB);
7981       }
7982     } else {
7983       if (adr.index_needs_rex()) {
7984         prefix(REX_RX);
7985       } else {
7986         prefix(REX_R);
7987       }
7988     }
7989   }
7990 }
7991 
7992 void Assembler::prefixq(Address adr, XMMRegister src) {
7993   if (src->encoding() < 8) {
7994     if (adr.base_needs_rex()) {
7995       if (adr.index_needs_rex()) {
7996         prefix(REX_WXB);
7997       } else {
7998         prefix(REX_WB);
7999       }
8000     } else {
8001       if (adr.index_needs_rex()) {
8002         prefix(REX_WX);
8003       } else {
8004         prefix(REX_W);
8005       }
8006     }
8007   } else {
8008     if (adr.base_needs_rex()) {
8009       if (adr.index_needs_rex()) {
8010         prefix(REX_WRXB);
8011       } else {
8012         prefix(REX_WRB);
8013       }
8014     } else {
8015       if (adr.index_needs_rex()) {
8016         prefix(REX_WRX);
8017       } else {
8018         prefix(REX_WR);
8019       }
8020     }
8021   }
8022 }
8023 
8024 void Assembler::adcq(Register dst, int32_t imm32) {
8025   (void) prefixq_and_encode(dst->encoding());
8026   emit_arith(0x81, 0xD0, dst, imm32);
8027 }
8028 
8029 void Assembler::adcq(Register dst, Address src) {
8030   InstructionMark im(this);
8031   prefixq(src, dst);
8032   emit_int8(0x13);
8033   emit_operand(dst, src);
8034 }
8035 
8036 void Assembler::adcq(Register dst, Register src) {
8037   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8038   emit_arith(0x13, 0xC0, dst, src);
8039 }
8040 
8041 void Assembler::addq(Address dst, int32_t imm32) {
8042   InstructionMark im(this);
8043   prefixq(dst);
8044   emit_arith_operand(0x81, rax, dst,imm32);
8045 }
8046 
8047 void Assembler::addq(Address dst, Register src) {
8048   InstructionMark im(this);
8049   prefixq(dst, src);
8050   emit_int8(0x01);
8051   emit_operand(src, dst);
8052 }
8053 
8054 void Assembler::addq(Register dst, int32_t imm32) {
8055   (void) prefixq_and_encode(dst->encoding());
8056   emit_arith(0x81, 0xC0, dst, imm32);
8057 }
8058 
8059 void Assembler::addq(Register dst, Address src) {
8060   InstructionMark im(this);
8061   prefixq(src, dst);
8062   emit_int8(0x03);
8063   emit_operand(dst, src);
8064 }
8065 
8066 void Assembler::addq(Register dst, Register src) {
8067   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8068   emit_arith(0x03, 0xC0, dst, src);
8069 }
8070 
8071 void Assembler::adcxq(Register dst, Register src) {
8072   //assert(VM_Version::supports_adx(), "adx instructions not supported");
8073   emit_int8((unsigned char)0x66);
8074   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8075   emit_int8(0x0F);
8076   emit_int8(0x38);
8077   emit_int8((unsigned char)0xF6);
8078   emit_int8((unsigned char)(0xC0 | encode));
8079 }
8080 
8081 void Assembler::adoxq(Register dst, Register src) {
8082   //assert(VM_Version::supports_adx(), "adx instructions not supported");
8083   emit_int8((unsigned char)0xF3);
8084   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8085   emit_int8(0x0F);
8086   emit_int8(0x38);
8087   emit_int8((unsigned char)0xF6);
8088   emit_int8((unsigned char)(0xC0 | encode));
8089 }
8090 
8091 void Assembler::andq(Address dst, int32_t imm32) {
8092   InstructionMark im(this);
8093   prefixq(dst);
8094   emit_int8((unsigned char)0x81);
8095   emit_operand(rsp, dst, 4);
8096   emit_int32(imm32);
8097 }
8098 
8099 void Assembler::andq(Register dst, int32_t imm32) {
8100   (void) prefixq_and_encode(dst->encoding());
8101   emit_arith(0x81, 0xE0, dst, imm32);
8102 }
8103 
8104 void Assembler::andq(Register dst, Address src) {
8105   InstructionMark im(this);
8106   prefixq(src, dst);
8107   emit_int8(0x23);
8108   emit_operand(dst, src);
8109 }
8110 
8111 void Assembler::andq(Register dst, Register src) {
8112   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8113   emit_arith(0x23, 0xC0, dst, src);
8114 }
8115 
8116 void Assembler::andnq(Register dst, Register src1, Register src2) {
8117   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8118   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8119   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8120   emit_int8((unsigned char)0xF2);
8121   emit_int8((unsigned char)(0xC0 | encode));
8122 }
8123 
8124 void Assembler::andnq(Register dst, Register src1, Address src2) {
8125   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8126   InstructionMark im(this);
8127   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8128   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8129   emit_int8((unsigned char)0xF2);
8130   emit_operand(dst, src2);
8131 }
8132 
8133 void Assembler::bsfq(Register dst, Register src) {
8134   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8135   emit_int8(0x0F);
8136   emit_int8((unsigned char)0xBC);
8137   emit_int8((unsigned char)(0xC0 | encode));
8138 }
8139 
8140 void Assembler::bsrq(Register dst, Register src) {
8141   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8142   emit_int8(0x0F);
8143   emit_int8((unsigned char)0xBD);
8144   emit_int8((unsigned char)(0xC0 | encode));
8145 }
8146 
8147 void Assembler::bswapq(Register reg) {
8148   int encode = prefixq_and_encode(reg->encoding());
8149   emit_int8(0x0F);
8150   emit_int8((unsigned char)(0xC8 | encode));
8151 }
8152 
8153 void Assembler::blsiq(Register dst, Register src) {
8154   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8155   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8156   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8157   emit_int8((unsigned char)0xF3);
8158   emit_int8((unsigned char)(0xC0 | encode));
8159 }
8160 
8161 void Assembler::blsiq(Register dst, Address src) {
8162   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8163   InstructionMark im(this);
8164   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8165   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8166   emit_int8((unsigned char)0xF3);
8167   emit_operand(rbx, src);
8168 }
8169 
8170 void Assembler::blsmskq(Register dst, Register src) {
8171   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8172   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8173   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8174   emit_int8((unsigned char)0xF3);
8175   emit_int8((unsigned char)(0xC0 | encode));
8176 }
8177 
8178 void Assembler::blsmskq(Register dst, Address src) {
8179   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8180   InstructionMark im(this);
8181   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8182   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8183   emit_int8((unsigned char)0xF3);
8184   emit_operand(rdx, src);
8185 }
8186 
8187 void Assembler::blsrq(Register dst, Register src) {
8188   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8189   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8190   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8191   emit_int8((unsigned char)0xF3);
8192   emit_int8((unsigned char)(0xC0 | encode));
8193 }
8194 
8195 void Assembler::blsrq(Register dst, Address src) {
8196   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8197   InstructionMark im(this);
8198   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8199   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8200   emit_int8((unsigned char)0xF3);
8201   emit_operand(rcx, src);
8202 }
8203 
8204 void Assembler::cdqq() {
8205   prefix(REX_W);
8206   emit_int8((unsigned char)0x99);
8207 }
8208 
8209 void Assembler::clflush(Address adr) {
8210   prefix(adr);
8211   emit_int8(0x0F);
8212   emit_int8((unsigned char)0xAE);
8213   emit_operand(rdi, adr);
8214 }
8215 
8216 void Assembler::cmovq(Condition cc, Register dst, Register src) {
8217   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8218   emit_int8(0x0F);
8219   emit_int8(0x40 | cc);
8220   emit_int8((unsigned char)(0xC0 | encode));
8221 }
8222 
8223 void Assembler::cmovq(Condition cc, Register dst, Address src) {
8224   InstructionMark im(this);
8225   prefixq(src, dst);
8226   emit_int8(0x0F);
8227   emit_int8(0x40 | cc);
8228   emit_operand(dst, src);
8229 }
8230 
8231 void Assembler::cmpq(Address dst, int32_t imm32) {
8232   InstructionMark im(this);
8233   prefixq(dst);
8234   emit_int8((unsigned char)0x81);
8235   emit_operand(rdi, dst, 4);
8236   emit_int32(imm32);
8237 }
8238 
8239 void Assembler::cmpq(Register dst, int32_t imm32) {
8240   (void) prefixq_and_encode(dst->encoding());
8241   emit_arith(0x81, 0xF8, dst, imm32);
8242 }
8243 
8244 void Assembler::cmpq(Address dst, Register src) {
8245   InstructionMark im(this);
8246   prefixq(dst, src);
8247   emit_int8(0x3B);
8248   emit_operand(src, dst);
8249 }
8250 
8251 void Assembler::cmpq(Register dst, Register src) {
8252   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8253   emit_arith(0x3B, 0xC0, dst, src);
8254 }
8255 
8256 void Assembler::cmpq(Register dst, Address  src) {
8257   InstructionMark im(this);
8258   prefixq(src, dst);
8259   emit_int8(0x3B);
8260   emit_operand(dst, src);
8261 }
8262 
8263 void Assembler::cmpxchgq(Register reg, Address adr) {
8264   InstructionMark im(this);
8265   prefixq(adr, reg);
8266   emit_int8(0x0F);
8267   emit_int8((unsigned char)0xB1);
8268   emit_operand(reg, adr);
8269 }
8270 
8271 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
8272   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8273   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8274   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8275   emit_int8(0x2A);
8276   emit_int8((unsigned char)(0xC0 | encode));
8277 }
8278 
8279 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
8280   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8281   InstructionMark im(this);
8282   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8283   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
8284   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8285   emit_int8(0x2A);
8286   emit_operand(dst, src);
8287 }
8288 
8289 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
8290   NOT_LP64(assert(VM_Version::supports_sse(), ""));
8291   InstructionMark im(this);
8292   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8293   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
8294   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
8295   emit_int8(0x2A);
8296   emit_operand(dst, src);
8297 }
8298 
8299 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
8300   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8301   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8302   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8303   emit_int8(0x2C);
8304   emit_int8((unsigned char)(0xC0 | encode));
8305 }
8306 
8307 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
8308   NOT_LP64(assert(VM_Version::supports_sse(), ""));
8309   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8310   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
8311   emit_int8(0x2C);
8312   emit_int8((unsigned char)(0xC0 | encode));
8313 }
8314 
8315 void Assembler::decl(Register dst) {
8316   // Don't use it directly. Use MacroAssembler::decrementl() instead.
8317   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
8318   int encode = prefix_and_encode(dst->encoding());
8319   emit_int8((unsigned char)0xFF);
8320   emit_int8((unsigned char)(0xC8 | encode));
8321 }
8322 
8323 void Assembler::decq(Register dst) {
8324   // Don't use it directly. Use MacroAssembler::decrementq() instead.
8325   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8326   int encode = prefixq_and_encode(dst->encoding());
8327   emit_int8((unsigned char)0xFF);
8328   emit_int8(0xC8 | encode);
8329 }
8330 
8331 void Assembler::decq(Address dst) {
8332   // Don't use it directly. Use MacroAssembler::decrementq() instead.
8333   InstructionMark im(this);
8334   prefixq(dst);
8335   emit_int8((unsigned char)0xFF);
8336   emit_operand(rcx, dst);
8337 }
8338 
8339 void Assembler::fxrstor(Address src) {
8340   prefixq(src);
8341   emit_int8(0x0F);
8342   emit_int8((unsigned char)0xAE);
8343   emit_operand(as_Register(1), src);
8344 }
8345 
8346 void Assembler::xrstor(Address src) {
8347   prefixq(src);
8348   emit_int8(0x0F);
8349   emit_int8((unsigned char)0xAE);
8350   emit_operand(as_Register(5), src);
8351 }
8352 
8353 void Assembler::fxsave(Address dst) {
8354   prefixq(dst);
8355   emit_int8(0x0F);
8356   emit_int8((unsigned char)0xAE);
8357   emit_operand(as_Register(0), dst);
8358 }
8359 
8360 void Assembler::xsave(Address dst) {
8361   prefixq(dst);
8362   emit_int8(0x0F);
8363   emit_int8((unsigned char)0xAE);
8364   emit_operand(as_Register(4), dst);
8365 }
8366 
8367 void Assembler::idivq(Register src) {
8368   int encode = prefixq_and_encode(src->encoding());
8369   emit_int8((unsigned char)0xF7);
8370   emit_int8((unsigned char)(0xF8 | encode));
8371 }
8372 
8373 void Assembler::imulq(Register dst, Register src) {
8374   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8375   emit_int8(0x0F);
8376   emit_int8((unsigned char)0xAF);
8377   emit_int8((unsigned char)(0xC0 | encode));
8378 }
8379 
8380 void Assembler::imulq(Register dst, Register src, int value) {
8381   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8382   if (is8bit(value)) {
8383     emit_int8(0x6B);
8384     emit_int8((unsigned char)(0xC0 | encode));
8385     emit_int8(value & 0xFF);
8386   } else {
8387     emit_int8(0x69);
8388     emit_int8((unsigned char)(0xC0 | encode));
8389     emit_int32(value);
8390   }
8391 }
8392 
8393 void Assembler::imulq(Register dst, Address src) {
8394   InstructionMark im(this);
8395   prefixq(src, dst);
8396   emit_int8(0x0F);
8397   emit_int8((unsigned char) 0xAF);
8398   emit_operand(dst, src);
8399 }
8400 
8401 void Assembler::incl(Register dst) {
8402   // Don't use it directly. Use MacroAssembler::incrementl() instead.
8403   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8404   int encode = prefix_and_encode(dst->encoding());
8405   emit_int8((unsigned char)0xFF);
8406   emit_int8((unsigned char)(0xC0 | encode));
8407 }
8408 
8409 void Assembler::incq(Register dst) {
8410   // Don't use it directly. Use MacroAssembler::incrementq() instead.
8411   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8412   int encode = prefixq_and_encode(dst->encoding());
8413   emit_int8((unsigned char)0xFF);
8414   emit_int8((unsigned char)(0xC0 | encode));
8415 }
8416 
8417 void Assembler::incq(Address dst) {
8418   // Don't use it directly. Use MacroAssembler::incrementq() instead.
8419   InstructionMark im(this);
8420   prefixq(dst);
8421   emit_int8((unsigned char)0xFF);
8422   emit_operand(rax, dst);
8423 }
8424 
8425 void Assembler::lea(Register dst, Address src) {
8426   leaq(dst, src);
8427 }
8428 
8429 void Assembler::leaq(Register dst, Address src) {
8430   InstructionMark im(this);
8431   prefixq(src, dst);
8432   emit_int8((unsigned char)0x8D);
8433   emit_operand(dst, src);
8434 }
8435 
8436 void Assembler::mov64(Register dst, int64_t imm64) {
8437   InstructionMark im(this);
8438   int encode = prefixq_and_encode(dst->encoding());
8439   emit_int8((unsigned char)(0xB8 | encode));
8440   emit_int64(imm64);
8441 }
8442 
8443 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
8444   InstructionMark im(this);
8445   int encode = prefixq_and_encode(dst->encoding());
8446   emit_int8(0xB8 | encode);
8447   emit_data64(imm64, rspec);
8448 }
8449 
8450 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
8451   InstructionMark im(this);
8452   int encode = prefix_and_encode(dst->encoding());
8453   emit_int8((unsigned char)(0xB8 | encode));
8454   emit_data((int)imm32, rspec, narrow_oop_operand);
8455 }
8456 
8457 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
8458   InstructionMark im(this);
8459   prefix(dst);
8460   emit_int8((unsigned char)0xC7);
8461   emit_operand(rax, dst, 4);
8462   emit_data((int)imm32, rspec, narrow_oop_operand);
8463 }
8464 
8465 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
8466   InstructionMark im(this);
8467   int encode = prefix_and_encode(src1->encoding());
8468   emit_int8((unsigned char)0x81);
8469   emit_int8((unsigned char)(0xF8 | encode));
8470   emit_data((int)imm32, rspec, narrow_oop_operand);
8471 }
8472 
8473 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
8474   InstructionMark im(this);
8475   prefix(src1);
8476   emit_int8((unsigned char)0x81);
8477   emit_operand(rax, src1, 4);
8478   emit_data((int)imm32, rspec, narrow_oop_operand);
8479 }
8480 
8481 void Assembler::lzcntq(Register dst, Register src) {
8482   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
8483   emit_int8((unsigned char)0xF3);
8484   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8485   emit_int8(0x0F);
8486   emit_int8((unsigned char)0xBD);
8487   emit_int8((unsigned char)(0xC0 | encode));
8488 }
8489 
8490 void Assembler::movdq(XMMRegister dst, Register src) {
8491   // table D-1 says MMX/SSE2
8492   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8493   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8494   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8495   emit_int8(0x6E);
8496   emit_int8((unsigned char)(0xC0 | encode));
8497 }
8498 
8499 void Assembler::movdq(Register dst, XMMRegister src) {
8500   // table D-1 says MMX/SSE2
8501   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8502   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8503   // swap src/dst to get correct prefix
8504   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8505   emit_int8(0x7E);
8506   emit_int8((unsigned char)(0xC0 | encode));
8507 }
8508 
8509 void Assembler::movq(Register dst, Register src) {
8510   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8511   emit_int8((unsigned char)0x8B);
8512   emit_int8((unsigned char)(0xC0 | encode));
8513 }
8514 
8515 void Assembler::movq(Register dst, Address src) {
8516   InstructionMark im(this);
8517   prefixq(src, dst);
8518   emit_int8((unsigned char)0x8B);
8519   emit_operand(dst, src);
8520 }
8521 
8522 void Assembler::movq(Address dst, Register src) {
8523   InstructionMark im(this);
8524   prefixq(dst, src);
8525   emit_int8((unsigned char)0x89);
8526   emit_operand(src, dst);
8527 }
8528 
8529 void Assembler::movsbq(Register dst, Address src) {
8530   InstructionMark im(this);
8531   prefixq(src, dst);
8532   emit_int8(0x0F);
8533   emit_int8((unsigned char)0xBE);
8534   emit_operand(dst, src);
8535 }
8536 
8537 void Assembler::movsbq(Register dst, Register src) {
8538   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8539   emit_int8(0x0F);
8540   emit_int8((unsigned char)0xBE);
8541   emit_int8((unsigned char)(0xC0 | encode));
8542 }
8543 
8544 void Assembler::movslq(Register dst, int32_t imm32) {
8545   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
8546   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
8547   // as a result we shouldn't use until tested at runtime...
8548   ShouldNotReachHere();
8549   InstructionMark im(this);
8550   int encode = prefixq_and_encode(dst->encoding());
8551   emit_int8((unsigned char)(0xC7 | encode));
8552   emit_int32(imm32);
8553 }
8554 
8555 void Assembler::movslq(Address dst, int32_t imm32) {
8556   assert(is_simm32(imm32), "lost bits");
8557   InstructionMark im(this);
8558   prefixq(dst);
8559   emit_int8((unsigned char)0xC7);
8560   emit_operand(rax, dst, 4);
8561   emit_int32(imm32);
8562 }
8563 
8564 void Assembler::movslq(Register dst, Address src) {
8565   InstructionMark im(this);
8566   prefixq(src, dst);
8567   emit_int8(0x63);
8568   emit_operand(dst, src);
8569 }
8570 
8571 void Assembler::movslq(Register dst, Register src) {
8572   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8573   emit_int8(0x63);
8574   emit_int8((unsigned char)(0xC0 | encode));
8575 }
8576 
8577 void Assembler::movswq(Register dst, Address src) {
8578   InstructionMark im(this);
8579   prefixq(src, dst);
8580   emit_int8(0x0F);
8581   emit_int8((unsigned char)0xBF);
8582   emit_operand(dst, src);
8583 }
8584 
8585 void Assembler::movswq(Register dst, Register src) {
8586   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8587   emit_int8((unsigned char)0x0F);
8588   emit_int8((unsigned char)0xBF);
8589   emit_int8((unsigned char)(0xC0 | encode));
8590 }
8591 
8592 void Assembler::movzbq(Register dst, Address src) {
8593   InstructionMark im(this);
8594   prefixq(src, dst);
8595   emit_int8((unsigned char)0x0F);
8596   emit_int8((unsigned char)0xB6);
8597   emit_operand(dst, src);
8598 }
8599 
8600 void Assembler::movzbq(Register dst, Register src) {
8601   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8602   emit_int8(0x0F);
8603   emit_int8((unsigned char)0xB6);
8604   emit_int8(0xC0 | encode);
8605 }
8606 
8607 void Assembler::movzwq(Register dst, Address src) {
8608   InstructionMark im(this);
8609   prefixq(src, dst);
8610   emit_int8((unsigned char)0x0F);
8611   emit_int8((unsigned char)0xB7);
8612   emit_operand(dst, src);
8613 }
8614 
8615 void Assembler::movzwq(Register dst, Register src) {
8616   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8617   emit_int8((unsigned char)0x0F);
8618   emit_int8((unsigned char)0xB7);
8619   emit_int8((unsigned char)(0xC0 | encode));
8620 }
8621 
8622 void Assembler::mulq(Address src) {
8623   InstructionMark im(this);
8624   prefixq(src);
8625   emit_int8((unsigned char)0xF7);
8626   emit_operand(rsp, src);
8627 }
8628 
8629 void Assembler::mulq(Register src) {
8630   int encode = prefixq_and_encode(src->encoding());
8631   emit_int8((unsigned char)0xF7);
8632   emit_int8((unsigned char)(0xE0 | encode));
8633 }
8634 
8635 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
8636   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8637   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8638   int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
8639   emit_int8((unsigned char)0xF6);
8640   emit_int8((unsigned char)(0xC0 | encode));
8641 }
8642 
8643 void Assembler::negq(Register dst) {
8644   int encode = prefixq_and_encode(dst->encoding());
8645   emit_int8((unsigned char)0xF7);
8646   emit_int8((unsigned char)(0xD8 | encode));
8647 }
8648 
8649 void Assembler::notq(Register dst) {
8650   int encode = prefixq_and_encode(dst->encoding());
8651   emit_int8((unsigned char)0xF7);
8652   emit_int8((unsigned char)(0xD0 | encode));
8653 }
8654 
8655 void Assembler::orq(Address dst, int32_t imm32) {
8656   InstructionMark im(this);
8657   prefixq(dst);
8658   emit_int8((unsigned char)0x81);
8659   emit_operand(rcx, dst, 4);
8660   emit_int32(imm32);
8661 }
8662 
8663 void Assembler::orq(Register dst, int32_t imm32) {
8664   (void) prefixq_and_encode(dst->encoding());
8665   emit_arith(0x81, 0xC8, dst, imm32);
8666 }
8667 
8668 void Assembler::orq(Register dst, Address src) {
8669   InstructionMark im(this);
8670   prefixq(src, dst);
8671   emit_int8(0x0B);
8672   emit_operand(dst, src);
8673 }
8674 
8675 void Assembler::orq(Register dst, Register src) {
8676   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8677   emit_arith(0x0B, 0xC0, dst, src);
8678 }
8679 
8680 void Assembler::popa() { // 64bit
8681   movq(r15, Address(rsp, 0));
8682   movq(r14, Address(rsp, wordSize));
8683   movq(r13, Address(rsp, 2 * wordSize));
8684   movq(r12, Address(rsp, 3 * wordSize));
8685   movq(r11, Address(rsp, 4 * wordSize));
8686   movq(r10, Address(rsp, 5 * wordSize));
8687   movq(r9,  Address(rsp, 6 * wordSize));
8688   movq(r8,  Address(rsp, 7 * wordSize));
8689   movq(rdi, Address(rsp, 8 * wordSize));
8690   movq(rsi, Address(rsp, 9 * wordSize));
8691   movq(rbp, Address(rsp, 10 * wordSize));
8692   // skip rsp
8693   movq(rbx, Address(rsp, 12 * wordSize));
8694   movq(rdx, Address(rsp, 13 * wordSize));
8695   movq(rcx, Address(rsp, 14 * wordSize));
8696   movq(rax, Address(rsp, 15 * wordSize));
8697 
8698   addq(rsp, 16 * wordSize);
8699 }
8700 
8701 void Assembler::popcntq(Register dst, Address src) {
8702   assert(VM_Version::supports_popcnt(), "must support");
8703   InstructionMark im(this);
8704   emit_int8((unsigned char)0xF3);
8705   prefixq(src, dst);
8706   emit_int8((unsigned char)0x0F);
8707   emit_int8((unsigned char)0xB8);
8708   emit_operand(dst, src);
8709 }
8710 
8711 void Assembler::popcntq(Register dst, Register src) {
8712   assert(VM_Version::supports_popcnt(), "must support");
8713   emit_int8((unsigned char)0xF3);
8714   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8715   emit_int8((unsigned char)0x0F);
8716   emit_int8((unsigned char)0xB8);
8717   emit_int8((unsigned char)(0xC0 | encode));
8718 }
8719 
8720 void Assembler::popq(Address dst) {
8721   InstructionMark im(this);
8722   prefixq(dst);
8723   emit_int8((unsigned char)0x8F);
8724   emit_operand(rax, dst);
8725 }
8726 
8727 void Assembler::pusha() { // 64bit
8728   // we have to store original rsp.  ABI says that 128 bytes
8729   // below rsp are local scratch.
8730   movq(Address(rsp, -5 * wordSize), rsp);
8731 
8732   subq(rsp, 16 * wordSize);
8733 
8734   movq(Address(rsp, 15 * wordSize), rax);
8735   movq(Address(rsp, 14 * wordSize), rcx);
8736   movq(Address(rsp, 13 * wordSize), rdx);
8737   movq(Address(rsp, 12 * wordSize), rbx);
8738   // skip rsp
8739   movq(Address(rsp, 10 * wordSize), rbp);
8740   movq(Address(rsp, 9 * wordSize), rsi);
8741   movq(Address(rsp, 8 * wordSize), rdi);
8742   movq(Address(rsp, 7 * wordSize), r8);
8743   movq(Address(rsp, 6 * wordSize), r9);
8744   movq(Address(rsp, 5 * wordSize), r10);
8745   movq(Address(rsp, 4 * wordSize), r11);
8746   movq(Address(rsp, 3 * wordSize), r12);
8747   movq(Address(rsp, 2 * wordSize), r13);
8748   movq(Address(rsp, wordSize), r14);
8749   movq(Address(rsp, 0), r15);
8750 }
8751 
8752 void Assembler::pushq(Address src) {
8753   InstructionMark im(this);
8754   prefixq(src);
8755   emit_int8((unsigned char)0xFF);
8756   emit_operand(rsi, src);
8757 }
8758 
8759 void Assembler::rclq(Register dst, int imm8) {
8760   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8761   int encode = prefixq_and_encode(dst->encoding());
8762   if (imm8 == 1) {
8763     emit_int8((unsigned char)0xD1);
8764     emit_int8((unsigned char)(0xD0 | encode));
8765   } else {
8766     emit_int8((unsigned char)0xC1);
8767     emit_int8((unsigned char)(0xD0 | encode));
8768     emit_int8(imm8);
8769   }
8770 }
8771 
8772 void Assembler::rcrq(Register dst, int imm8) {
8773   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8774   int encode = prefixq_and_encode(dst->encoding());
8775   if (imm8 == 1) {
8776     emit_int8((unsigned char)0xD1);
8777     emit_int8((unsigned char)(0xD8 | encode));
8778   } else {
8779     emit_int8((unsigned char)0xC1);
8780     emit_int8((unsigned char)(0xD8 | encode));
8781     emit_int8(imm8);
8782   }
8783 }
8784 
8785 void Assembler::rorq(Register dst, int imm8) {
8786   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8787   int encode = prefixq_and_encode(dst->encoding());
8788   if (imm8 == 1) {
8789     emit_int8((unsigned char)0xD1);
8790     emit_int8((unsigned char)(0xC8 | encode));
8791   } else {
8792     emit_int8((unsigned char)0xC1);
8793     emit_int8((unsigned char)(0xc8 | encode));
8794     emit_int8(imm8);
8795   }
8796 }
8797 
8798 void Assembler::rorxq(Register dst, Register src, int imm8) {
8799   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8800   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8801   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
8802   emit_int8((unsigned char)0xF0);
8803   emit_int8((unsigned char)(0xC0 | encode));
8804   emit_int8(imm8);
8805 }
8806 
8807 void Assembler::rorxd(Register dst, Register src, int imm8) {
8808   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8809   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8810   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
8811   emit_int8((unsigned char)0xF0);
8812   emit_int8((unsigned char)(0xC0 | encode));
8813   emit_int8(imm8);
8814 }
8815 
8816 void Assembler::sarq(Register dst, int imm8) {
8817   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8818   int encode = prefixq_and_encode(dst->encoding());
8819   if (imm8 == 1) {
8820     emit_int8((unsigned char)0xD1);
8821     emit_int8((unsigned char)(0xF8 | encode));
8822   } else {
8823     emit_int8((unsigned char)0xC1);
8824     emit_int8((unsigned char)(0xF8 | encode));
8825     emit_int8(imm8);
8826   }
8827 }
8828 
8829 void Assembler::sarq(Register dst) {
8830   int encode = prefixq_and_encode(dst->encoding());
8831   emit_int8((unsigned char)0xD3);
8832   emit_int8((unsigned char)(0xF8 | encode));
8833 }
8834 
8835 void Assembler::sbbq(Address dst, int32_t imm32) {
8836   InstructionMark im(this);
8837   prefixq(dst);
8838   emit_arith_operand(0x81, rbx, dst, imm32);
8839 }
8840 
8841 void Assembler::sbbq(Register dst, int32_t imm32) {
8842   (void) prefixq_and_encode(dst->encoding());
8843   emit_arith(0x81, 0xD8, dst, imm32);
8844 }
8845 
8846 void Assembler::sbbq(Register dst, Address src) {
8847   InstructionMark im(this);
8848   prefixq(src, dst);
8849   emit_int8(0x1B);
8850   emit_operand(dst, src);
8851 }
8852 
8853 void Assembler::sbbq(Register dst, Register src) {
8854   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8855   emit_arith(0x1B, 0xC0, dst, src);
8856 }
8857 
8858 void Assembler::shlq(Register dst, int imm8) {
8859   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8860   int encode = prefixq_and_encode(dst->encoding());
8861   if (imm8 == 1) {
8862     emit_int8((unsigned char)0xD1);
8863     emit_int8((unsigned char)(0xE0 | encode));
8864   } else {
8865     emit_int8((unsigned char)0xC1);
8866     emit_int8((unsigned char)(0xE0 | encode));
8867     emit_int8(imm8);
8868   }
8869 }
8870 
8871 void Assembler::shlq(Register dst) {
8872   int encode = prefixq_and_encode(dst->encoding());
8873   emit_int8((unsigned char)0xD3);
8874   emit_int8((unsigned char)(0xE0 | encode));
8875 }
8876 
8877 void Assembler::shrq(Register dst, int imm8) {
8878   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8879   int encode = prefixq_and_encode(dst->encoding());
8880   emit_int8((unsigned char)0xC1);
8881   emit_int8((unsigned char)(0xE8 | encode));
8882   emit_int8(imm8);
8883 }
8884 
8885 void Assembler::shrq(Register dst) {
8886   int encode = prefixq_and_encode(dst->encoding());
8887   emit_int8((unsigned char)0xD3);
8888   emit_int8(0xE8 | encode);
8889 }
8890 
8891 void Assembler::subq(Address dst, int32_t imm32) {
8892   InstructionMark im(this);
8893   prefixq(dst);
8894   emit_arith_operand(0x81, rbp, dst, imm32);
8895 }
8896 
8897 void Assembler::subq(Address dst, Register src) {
8898   InstructionMark im(this);
8899   prefixq(dst, src);
8900   emit_int8(0x29);
8901   emit_operand(src, dst);
8902 }
8903 
8904 void Assembler::subq(Register dst, int32_t imm32) {
8905   (void) prefixq_and_encode(dst->encoding());
8906   emit_arith(0x81, 0xE8, dst, imm32);
8907 }
8908 
8909 // Force generation of a 4 byte immediate value even if it fits into 8bit
8910 void Assembler::subq_imm32(Register dst, int32_t imm32) {
8911   (void) prefixq_and_encode(dst->encoding());
8912   emit_arith_imm32(0x81, 0xE8, dst, imm32);
8913 }
8914 
8915 void Assembler::subq(Register dst, Address src) {
8916   InstructionMark im(this);
8917   prefixq(src, dst);
8918   emit_int8(0x2B);
8919   emit_operand(dst, src);
8920 }
8921 
8922 void Assembler::subq(Register dst, Register src) {
8923   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8924   emit_arith(0x2B, 0xC0, dst, src);
8925 }
8926 
8927 void Assembler::testq(Register dst, int32_t imm32) {
8928   // not using emit_arith because test
8929   // doesn't support sign-extension of
8930   // 8bit operands
8931   int encode = dst->encoding();
8932   if (encode == 0) {
8933     prefix(REX_W);
8934     emit_int8((unsigned char)0xA9);
8935   } else {
8936     encode = prefixq_and_encode(encode);
8937     emit_int8((unsigned char)0xF7);
8938     emit_int8((unsigned char)(0xC0 | encode));
8939   }
8940   emit_int32(imm32);
8941 }
8942 
8943 void Assembler::testq(Register dst, Register src) {
8944   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8945   emit_arith(0x85, 0xC0, dst, src);
8946 }
8947 
8948 void Assembler::xaddq(Address dst, Register src) {
8949   InstructionMark im(this);
8950   prefixq(dst, src);
8951   emit_int8(0x0F);
8952   emit_int8((unsigned char)0xC1);
8953   emit_operand(src, dst);
8954 }
8955 
8956 void Assembler::xchgq(Register dst, Address src) {
8957   InstructionMark im(this);
8958   prefixq(src, dst);
8959   emit_int8((unsigned char)0x87);
8960   emit_operand(dst, src);
8961 }
8962 
8963 void Assembler::xchgq(Register dst, Register src) {
8964   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8965   emit_int8((unsigned char)0x87);
8966   emit_int8((unsigned char)(0xc0 | encode));
8967 }
8968 
8969 void Assembler::xorq(Register dst, Register src) {
8970   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8971   emit_arith(0x33, 0xC0, dst, src);
8972 }
8973 
8974 void Assembler::xorq(Register dst, Address src) {
8975   InstructionMark im(this);
8976   prefixq(src, dst);
8977   emit_int8(0x33);
8978   emit_operand(dst, src);
8979 }
8980 
8981 #endif // !LP64