1 /*
   2  * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "gc/shared/cardTableModRefBS.hpp"
  29 #include "gc/shared/collectedHeap.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "memory/resourceArea.hpp"
  32 #include "prims/methodHandles.hpp"
  33 #include "runtime/biasedLocking.hpp"
  34 #include "runtime/objectMonitor.hpp"
  35 #include "runtime/os.hpp"
  36 #include "runtime/sharedRuntime.hpp"
  37 #include "runtime/stubRoutines.hpp"
  38 #include "utilities/macros.hpp"
  39 #if INCLUDE_ALL_GCS
  40 #include "gc/g1/g1BarrierSet.hpp"
  41 #include "gc/g1/g1CollectedHeap.inline.hpp"
  42 #include "gc/g1/heapRegion.hpp"
  43 #endif // INCLUDE_ALL_GCS
  44 
  45 #ifdef PRODUCT
  46 #define BLOCK_COMMENT(str) /* nothing */
  47 #define STOP(error) stop(error)
  48 #else
  49 #define BLOCK_COMMENT(str) block_comment(str)
  50 #define STOP(error) block_comment(error); stop(error)
  51 #endif
  52 
  53 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  54 // Implementation of AddressLiteral
  55 
  56 // A 2-D table for managing compressed displacement(disp8) on EVEX enabled platforms.
  57 unsigned char tuple_table[Assembler::EVEX_ETUP + 1][Assembler::AVX_512bit + 1] = {
  58   // -----------------Table 4.5 -------------------- //
  59   16, 32, 64,  // EVEX_FV(0)
  60   4,  4,  4,   // EVEX_FV(1) - with Evex.b
  61   16, 32, 64,  // EVEX_FV(2) - with Evex.w
  62   8,  8,  8,   // EVEX_FV(3) - with Evex.w and Evex.b
  63   8,  16, 32,  // EVEX_HV(0)
  64   4,  4,  4,   // EVEX_HV(1) - with Evex.b
  65   // -----------------Table 4.6 -------------------- //
  66   16, 32, 64,  // EVEX_FVM(0)
  67   1,  1,  1,   // EVEX_T1S(0)
  68   2,  2,  2,   // EVEX_T1S(1)
  69   4,  4,  4,   // EVEX_T1S(2)
  70   8,  8,  8,   // EVEX_T1S(3)
  71   4,  4,  4,   // EVEX_T1F(0)
  72   8,  8,  8,   // EVEX_T1F(1)
  73   8,  8,  8,   // EVEX_T2(0)
  74   0,  16, 16,  // EVEX_T2(1)
  75   0,  16, 16,  // EVEX_T4(0)
  76   0,  0,  32,  // EVEX_T4(1)
  77   0,  0,  32,  // EVEX_T8(0)
  78   8,  16, 32,  // EVEX_HVM(0)
  79   4,  8,  16,  // EVEX_QVM(0)
  80   2,  4,  8,   // EVEX_OVM(0)
  81   16, 16, 16,  // EVEX_M128(0)
  82   8,  32, 64,  // EVEX_DUP(0)
  83   0,  0,  0    // EVEX_NTUP
  84 };
  85 
  86 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  87   _is_lval = false;
  88   _target = target;
  89   switch (rtype) {
  90   case relocInfo::oop_type:
  91   case relocInfo::metadata_type:
  92     // Oops are a special case. Normally they would be their own section
  93     // but in cases like icBuffer they are literals in the code stream that
  94     // we don't have a section for. We use none so that we get a literal address
  95     // which is always patchable.
  96     break;
  97   case relocInfo::external_word_type:
  98     _rspec = external_word_Relocation::spec(target);
  99     break;
 100   case relocInfo::internal_word_type:
 101     _rspec = internal_word_Relocation::spec(target);
 102     break;
 103   case relocInfo::opt_virtual_call_type:
 104     _rspec = opt_virtual_call_Relocation::spec();
 105     break;
 106   case relocInfo::static_call_type:
 107     _rspec = static_call_Relocation::spec();
 108     break;
 109   case relocInfo::runtime_call_type:
 110     _rspec = runtime_call_Relocation::spec();
 111     break;
 112   case relocInfo::poll_type:
 113   case relocInfo::poll_return_type:
 114     _rspec = Relocation::spec_simple(rtype);
 115     break;
 116   case relocInfo::none:
 117     break;
 118   default:
 119     ShouldNotReachHere();
 120     break;
 121   }
 122 }
 123 
 124 // Implementation of Address
 125 
 126 #ifdef _LP64
 127 
 128 Address Address::make_array(ArrayAddress adr) {
 129   // Not implementable on 64bit machines
 130   // Should have been handled higher up the call chain.
 131   ShouldNotReachHere();
 132   return Address();
 133 }
 134 
 135 // exceedingly dangerous constructor
 136 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
 137   _base  = noreg;
 138   _index = noreg;
 139   _scale = no_scale;
 140   _disp  = disp;
 141   switch (rtype) {
 142     case relocInfo::external_word_type:
 143       _rspec = external_word_Relocation::spec(loc);
 144       break;
 145     case relocInfo::internal_word_type:
 146       _rspec = internal_word_Relocation::spec(loc);
 147       break;
 148     case relocInfo::runtime_call_type:
 149       // HMM
 150       _rspec = runtime_call_Relocation::spec();
 151       break;
 152     case relocInfo::poll_type:
 153     case relocInfo::poll_return_type:
 154       _rspec = Relocation::spec_simple(rtype);
 155       break;
 156     case relocInfo::none:
 157       break;
 158     default:
 159       ShouldNotReachHere();
 160   }
 161 }
 162 #else // LP64
 163 
 164 Address Address::make_array(ArrayAddress adr) {
 165   AddressLiteral base = adr.base();
 166   Address index = adr.index();
 167   assert(index._disp == 0, "must not have disp"); // maybe it can?
 168   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
 169   array._rspec = base._rspec;
 170   return array;
 171 }
 172 
 173 // exceedingly dangerous constructor
 174 Address::Address(address loc, RelocationHolder spec) {
 175   _base  = noreg;
 176   _index = noreg;
 177   _scale = no_scale;
 178   _disp  = (intptr_t) loc;
 179   _rspec = spec;
 180 }
 181 
 182 #endif // _LP64
 183 
 184 
 185 
 186 // Convert the raw encoding form into the form expected by the constructor for
 187 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 188 // that to noreg for the Address constructor.
 189 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
 190   RelocationHolder rspec;
 191   if (disp_reloc != relocInfo::none) {
 192     rspec = Relocation::spec_simple(disp_reloc);
 193   }
 194   bool valid_index = index != rsp->encoding();
 195   if (valid_index) {
 196     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 197     madr._rspec = rspec;
 198     return madr;
 199   } else {
 200     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 201     madr._rspec = rspec;
 202     return madr;
 203   }
 204 }
 205 
 206 // Implementation of Assembler
 207 
 208 int AbstractAssembler::code_fill_byte() {
 209   return (u_char)'\xF4'; // hlt
 210 }
 211 
 212 // make this go away someday
 213 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
 214   if (rtype == relocInfo::none)
 215     emit_int32(data);
 216   else
 217     emit_data(data, Relocation::spec_simple(rtype), format);
 218 }
 219 
 220 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
 221   assert(imm_operand == 0, "default format must be immediate in this file");
 222   assert(inst_mark() != NULL, "must be inside InstructionMark");
 223   if (rspec.type() !=  relocInfo::none) {
 224     #ifdef ASSERT
 225       check_relocation(rspec, format);
 226     #endif
 227     // Do not use AbstractAssembler::relocate, which is not intended for
 228     // embedded words.  Instead, relocate to the enclosing instruction.
 229 
 230     // hack. call32 is too wide for mask so use disp32
 231     if (format == call32_operand)
 232       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 233     else
 234       code_section()->relocate(inst_mark(), rspec, format);
 235   }
 236   emit_int32(data);
 237 }
 238 
 239 static int encode(Register r) {
 240   int enc = r->encoding();
 241   if (enc >= 8) {
 242     enc -= 8;
 243   }
 244   return enc;
 245 }
 246 
 247 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
 248   assert(dst->has_byte_register(), "must have byte register");
 249   assert(isByte(op1) && isByte(op2), "wrong opcode");
 250   assert(isByte(imm8), "not a byte");
 251   assert((op1 & 0x01) == 0, "should be 8bit operation");
 252   emit_int8(op1);
 253   emit_int8(op2 | encode(dst));
 254   emit_int8(imm8);
 255 }
 256 
 257 
 258 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
 259   assert(isByte(op1) && isByte(op2), "wrong opcode");
 260   assert((op1 & 0x01) == 1, "should be 32bit operation");
 261   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 262   if (is8bit(imm32)) {
 263     emit_int8(op1 | 0x02); // set sign bit
 264     emit_int8(op2 | encode(dst));
 265     emit_int8(imm32 & 0xFF);
 266   } else {
 267     emit_int8(op1);
 268     emit_int8(op2 | encode(dst));
 269     emit_int32(imm32);
 270   }
 271 }
 272 
 273 // Force generation of a 4 byte immediate value even if it fits into 8bit
 274 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
 275   assert(isByte(op1) && isByte(op2), "wrong opcode");
 276   assert((op1 & 0x01) == 1, "should be 32bit operation");
 277   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 278   emit_int8(op1);
 279   emit_int8(op2 | encode(dst));
 280   emit_int32(imm32);
 281 }
 282 
 283 // immediate-to-memory forms
 284 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
 285   assert((op1 & 0x01) == 1, "should be 32bit operation");
 286   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 287   if (is8bit(imm32)) {
 288     emit_int8(op1 | 0x02); // set sign bit
 289     emit_operand(rm, adr, 1);
 290     emit_int8(imm32 & 0xFF);
 291   } else {
 292     emit_int8(op1);
 293     emit_operand(rm, adr, 4);
 294     emit_int32(imm32);
 295   }
 296 }
 297 
 298 
 299 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 300   assert(isByte(op1) && isByte(op2), "wrong opcode");
 301   emit_int8(op1);
 302   emit_int8(op2 | encode(dst) << 3 | encode(src));
 303 }
 304 
 305 
 306 bool Assembler::query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
 307                                            int cur_tuple_type, int in_size_in_bits, int cur_encoding) {
 308   int mod_idx = 0;
 309   // We will test if the displacement fits the compressed format and if so
 310   // apply the compression to the displacment iff the result is8bit.
 311   if (VM_Version::supports_evex() && is_evex_inst) {
 312     switch (cur_tuple_type) {
 313     case EVEX_FV:
 314       if ((cur_encoding & VEX_W) == VEX_W) {
 315         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 316       } else {
 317         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 318       }
 319       break;
 320 
 321     case EVEX_HV:
 322       mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 323       break;
 324 
 325     case EVEX_FVM:
 326       break;
 327 
 328     case EVEX_T1S:
 329       switch (in_size_in_bits) {
 330       case EVEX_8bit:
 331         break;
 332 
 333       case EVEX_16bit:
 334         mod_idx = 1;
 335         break;
 336 
 337       case EVEX_32bit:
 338         mod_idx = 2;
 339         break;
 340 
 341       case EVEX_64bit:
 342         mod_idx = 3;
 343         break;
 344       }
 345       break;
 346 
 347     case EVEX_T1F:
 348     case EVEX_T2:
 349     case EVEX_T4:
 350       mod_idx = (in_size_in_bits == EVEX_64bit) ? 1 : 0;
 351       break;
 352 
 353     case EVEX_T8:
 354       break;
 355 
 356     case EVEX_HVM:
 357       break;
 358 
 359     case EVEX_QVM:
 360       break;
 361 
 362     case EVEX_OVM:
 363       break;
 364 
 365     case EVEX_M128:
 366       break;
 367 
 368     case EVEX_DUP:
 369       break;
 370 
 371     default:
 372       assert(0, "no valid evex tuple_table entry");
 373       break;
 374     }
 375 
 376     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 377       int disp_factor = tuple_table[cur_tuple_type + mod_idx][vector_len];
 378       if ((disp % disp_factor) == 0) {
 379         int new_disp = disp / disp_factor;
 380         if ((-0x80 <= new_disp && new_disp < 0x80)) {
 381           disp = new_disp;
 382         }
 383       } else {
 384         return false;
 385       }
 386     }
 387   }
 388   return (-0x80 <= disp && disp < 0x80);
 389 }
 390 
 391 
 392 bool Assembler::emit_compressed_disp_byte(int &disp) {
 393   int mod_idx = 0;
 394   // We will test if the displacement fits the compressed format and if so
 395   // apply the compression to the displacment iff the result is8bit.
 396   if (VM_Version::supports_evex() && _attributes && _attributes->is_evex_instruction()) {
 397     int evex_encoding = _attributes->get_evex_encoding();
 398     int tuple_type = _attributes->get_tuple_type();
 399     switch (tuple_type) {
 400     case EVEX_FV:
 401       if ((evex_encoding & VEX_W) == VEX_W) {
 402         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 403       } else {
 404         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 405       }
 406       break;
 407 
 408     case EVEX_HV:
 409       mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 410       break;
 411 
 412     case EVEX_FVM:
 413       break;
 414 
 415     case EVEX_T1S:
 416       switch (_attributes->get_input_size()) {
 417       case EVEX_8bit:
 418         break;
 419 
 420       case EVEX_16bit:
 421         mod_idx = 1;
 422         break;
 423 
 424       case EVEX_32bit:
 425         mod_idx = 2;
 426         break;
 427 
 428       case EVEX_64bit:
 429         mod_idx = 3;
 430         break;
 431       }
 432       break;
 433 
 434     case EVEX_T1F:
 435     case EVEX_T2:
 436     case EVEX_T4:
 437       mod_idx = (_attributes->get_input_size() == EVEX_64bit) ? 1 : 0;
 438       break;
 439 
 440     case EVEX_T8:
 441       break;
 442 
 443     case EVEX_HVM:
 444       break;
 445 
 446     case EVEX_QVM:
 447       break;
 448 
 449     case EVEX_OVM:
 450       break;
 451 
 452     case EVEX_M128:
 453       break;
 454 
 455     case EVEX_DUP:
 456       break;
 457 
 458     default:
 459       assert(0, "no valid evex tuple_table entry");
 460       break;
 461     }
 462 
 463     int vector_len = _attributes->get_vector_len();
 464     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 465       int disp_factor = tuple_table[tuple_type + mod_idx][vector_len];
 466       if ((disp % disp_factor) == 0) {
 467         int new_disp = disp / disp_factor;
 468         if (is8bit(new_disp)) {
 469           disp = new_disp;
 470         }
 471       } else {
 472         return false;
 473       }
 474     }
 475   }
 476   return is8bit(disp);
 477 }
 478 
 479 
 480 void Assembler::emit_operand(Register reg, Register base, Register index,
 481                              Address::ScaleFactor scale, int disp,
 482                              RelocationHolder const& rspec,
 483                              int rip_relative_correction) {
 484   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 485 
 486   // Encode the registers as needed in the fields they are used in
 487 
 488   int regenc = encode(reg) << 3;
 489   int indexenc = index->is_valid() ? encode(index) << 3 : 0;
 490   int baseenc = base->is_valid() ? encode(base) : 0;
 491 
 492   if (base->is_valid()) {
 493     if (index->is_valid()) {
 494       assert(scale != Address::no_scale, "inconsistent address");
 495       // [base + index*scale + disp]
 496       if (disp == 0 && rtype == relocInfo::none  &&
 497           base != rbp LP64_ONLY(&& base != r13)) {
 498         // [base + index*scale]
 499         // [00 reg 100][ss index base]
 500         assert(index != rsp, "illegal addressing mode");
 501         emit_int8(0x04 | regenc);
 502         emit_int8(scale << 6 | indexenc | baseenc);
 503       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 504         // [base + index*scale + imm8]
 505         // [01 reg 100][ss index base] imm8
 506         assert(index != rsp, "illegal addressing mode");
 507         emit_int8(0x44 | regenc);
 508         emit_int8(scale << 6 | indexenc | baseenc);
 509         emit_int8(disp & 0xFF);
 510       } else {
 511         // [base + index*scale + disp32]
 512         // [10 reg 100][ss index base] disp32
 513         assert(index != rsp, "illegal addressing mode");
 514         emit_int8(0x84 | regenc);
 515         emit_int8(scale << 6 | indexenc | baseenc);
 516         emit_data(disp, rspec, disp32_operand);
 517       }
 518     } else if (base == rsp LP64_ONLY(|| base == r12)) {
 519       // [rsp + disp]
 520       if (disp == 0 && rtype == relocInfo::none) {
 521         // [rsp]
 522         // [00 reg 100][00 100 100]
 523         emit_int8(0x04 | regenc);
 524         emit_int8(0x24);
 525       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 526         // [rsp + imm8]
 527         // [01 reg 100][00 100 100] disp8
 528         emit_int8(0x44 | regenc);
 529         emit_int8(0x24);
 530         emit_int8(disp & 0xFF);
 531       } else {
 532         // [rsp + imm32]
 533         // [10 reg 100][00 100 100] disp32
 534         emit_int8(0x84 | regenc);
 535         emit_int8(0x24);
 536         emit_data(disp, rspec, disp32_operand);
 537       }
 538     } else {
 539       // [base + disp]
 540       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
 541       if (disp == 0 && rtype == relocInfo::none &&
 542           base != rbp LP64_ONLY(&& base != r13)) {
 543         // [base]
 544         // [00 reg base]
 545         emit_int8(0x00 | regenc | baseenc);
 546       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 547         // [base + disp8]
 548         // [01 reg base] disp8
 549         emit_int8(0x40 | regenc | baseenc);
 550         emit_int8(disp & 0xFF);
 551       } else {
 552         // [base + disp32]
 553         // [10 reg base] disp32
 554         emit_int8(0x80 | regenc | baseenc);
 555         emit_data(disp, rspec, disp32_operand);
 556       }
 557     }
 558   } else {
 559     if (index->is_valid()) {
 560       assert(scale != Address::no_scale, "inconsistent address");
 561       // [index*scale + disp]
 562       // [00 reg 100][ss index 101] disp32
 563       assert(index != rsp, "illegal addressing mode");
 564       emit_int8(0x04 | regenc);
 565       emit_int8(scale << 6 | indexenc | 0x05);
 566       emit_data(disp, rspec, disp32_operand);
 567     } else if (rtype != relocInfo::none ) {
 568       // [disp] (64bit) RIP-RELATIVE (32bit) abs
 569       // [00 000 101] disp32
 570 
 571       emit_int8(0x05 | regenc);
 572       // Note that the RIP-rel. correction applies to the generated
 573       // disp field, but _not_ to the target address in the rspec.
 574 
 575       // disp was created by converting the target address minus the pc
 576       // at the start of the instruction. That needs more correction here.
 577       // intptr_t disp = target - next_ip;
 578       assert(inst_mark() != NULL, "must be inside InstructionMark");
 579       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 580       int64_t adjusted = disp;
 581       // Do rip-rel adjustment for 64bit
 582       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 583       assert(is_simm32(adjusted),
 584              "must be 32bit offset (RIP relative address)");
 585       emit_data((int32_t) adjusted, rspec, disp32_operand);
 586 
 587     } else {
 588       // 32bit never did this, did everything as the rip-rel/disp code above
 589       // [disp] ABSOLUTE
 590       // [00 reg 100][00 100 101] disp32
 591       emit_int8(0x04 | regenc);
 592       emit_int8(0x25);
 593       emit_data(disp, rspec, disp32_operand);
 594     }
 595   }
 596 }
 597 
 598 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 599                              Address::ScaleFactor scale, int disp,
 600                              RelocationHolder const& rspec) {
 601   if (UseAVX > 2) {
 602     int xreg_enc = reg->encoding();
 603     if (xreg_enc > 15) {
 604       XMMRegister new_reg = as_XMMRegister(xreg_enc & 0xf);
 605       emit_operand((Register)new_reg, base, index, scale, disp, rspec);
 606       return;
 607     }
 608   }
 609   emit_operand((Register)reg, base, index, scale, disp, rspec);
 610 }
 611 
 612 // Secret local extension to Assembler::WhichOperand:
 613 #define end_pc_operand (_WhichOperand_limit)
 614 
 615 address Assembler::locate_operand(address inst, WhichOperand which) {
 616   // Decode the given instruction, and return the address of
 617   // an embedded 32-bit operand word.
 618 
 619   // If "which" is disp32_operand, selects the displacement portion
 620   // of an effective address specifier.
 621   // If "which" is imm64_operand, selects the trailing immediate constant.
 622   // If "which" is call32_operand, selects the displacement of a call or jump.
 623   // Caller is responsible for ensuring that there is such an operand,
 624   // and that it is 32/64 bits wide.
 625 
 626   // If "which" is end_pc_operand, find the end of the instruction.
 627 
 628   address ip = inst;
 629   bool is_64bit = false;
 630 
 631   debug_only(bool has_disp32 = false);
 632   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
 633 
 634   again_after_prefix:
 635   switch (0xFF & *ip++) {
 636 
 637   // These convenience macros generate groups of "case" labels for the switch.
 638 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
 639 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
 640              case (x)+4: case (x)+5: case (x)+6: case (x)+7
 641 #define REP16(x) REP8((x)+0): \
 642               case REP8((x)+8)
 643 
 644   case CS_segment:
 645   case SS_segment:
 646   case DS_segment:
 647   case ES_segment:
 648   case FS_segment:
 649   case GS_segment:
 650     // Seems dubious
 651     LP64_ONLY(assert(false, "shouldn't have that prefix"));
 652     assert(ip == inst+1, "only one prefix allowed");
 653     goto again_after_prefix;
 654 
 655   case 0x67:
 656   case REX:
 657   case REX_B:
 658   case REX_X:
 659   case REX_XB:
 660   case REX_R:
 661   case REX_RB:
 662   case REX_RX:
 663   case REX_RXB:
 664     NOT_LP64(assert(false, "64bit prefixes"));
 665     goto again_after_prefix;
 666 
 667   case REX_W:
 668   case REX_WB:
 669   case REX_WX:
 670   case REX_WXB:
 671   case REX_WR:
 672   case REX_WRB:
 673   case REX_WRX:
 674   case REX_WRXB:
 675     NOT_LP64(assert(false, "64bit prefixes"));
 676     is_64bit = true;
 677     goto again_after_prefix;
 678 
 679   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
 680   case 0x88: // movb a, r
 681   case 0x89: // movl a, r
 682   case 0x8A: // movb r, a
 683   case 0x8B: // movl r, a
 684   case 0x8F: // popl a
 685     debug_only(has_disp32 = true);
 686     break;
 687 
 688   case 0x68: // pushq #32
 689     if (which == end_pc_operand) {
 690       return ip + 4;
 691     }
 692     assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
 693     return ip;                  // not produced by emit_operand
 694 
 695   case 0x66: // movw ... (size prefix)
 696     again_after_size_prefix2:
 697     switch (0xFF & *ip++) {
 698     case REX:
 699     case REX_B:
 700     case REX_X:
 701     case REX_XB:
 702     case REX_R:
 703     case REX_RB:
 704     case REX_RX:
 705     case REX_RXB:
 706     case REX_W:
 707     case REX_WB:
 708     case REX_WX:
 709     case REX_WXB:
 710     case REX_WR:
 711     case REX_WRB:
 712     case REX_WRX:
 713     case REX_WRXB:
 714       NOT_LP64(assert(false, "64bit prefix found"));
 715       goto again_after_size_prefix2;
 716     case 0x8B: // movw r, a
 717     case 0x89: // movw a, r
 718       debug_only(has_disp32 = true);
 719       break;
 720     case 0xC7: // movw a, #16
 721       debug_only(has_disp32 = true);
 722       tail_size = 2;  // the imm16
 723       break;
 724     case 0x0F: // several SSE/SSE2 variants
 725       ip--;    // reparse the 0x0F
 726       goto again_after_prefix;
 727     default:
 728       ShouldNotReachHere();
 729     }
 730     break;
 731 
 732   case REP8(0xB8): // movl/q r, #32/#64(oop?)
 733     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
 734     // these asserts are somewhat nonsensical
 735 #ifndef _LP64
 736     assert(which == imm_operand || which == disp32_operand,
 737            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 738 #else
 739     assert((which == call32_operand || which == imm_operand) && is_64bit ||
 740            which == narrow_oop_operand && !is_64bit,
 741            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 742 #endif // _LP64
 743     return ip;
 744 
 745   case 0x69: // imul r, a, #32
 746   case 0xC7: // movl a, #32(oop?)
 747     tail_size = 4;
 748     debug_only(has_disp32 = true); // has both kinds of operands!
 749     break;
 750 
 751   case 0x0F: // movx..., etc.
 752     switch (0xFF & *ip++) {
 753     case 0x3A: // pcmpestri
 754       tail_size = 1;
 755     case 0x38: // ptest, pmovzxbw
 756       ip++; // skip opcode
 757       debug_only(has_disp32 = true); // has both kinds of operands!
 758       break;
 759 
 760     case 0x70: // pshufd r, r/a, #8
 761       debug_only(has_disp32 = true); // has both kinds of operands!
 762     case 0x73: // psrldq r, #8
 763       tail_size = 1;
 764       break;
 765 
 766     case 0x12: // movlps
 767     case 0x28: // movaps
 768     case 0x2E: // ucomiss
 769     case 0x2F: // comiss
 770     case 0x54: // andps
 771     case 0x55: // andnps
 772     case 0x56: // orps
 773     case 0x57: // xorps
 774     case 0x58: // addpd
 775     case 0x59: // mulpd
 776     case 0x6E: // movd
 777     case 0x7E: // movd
 778     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
 779     case 0xFE: // paddd
 780       debug_only(has_disp32 = true);
 781       break;
 782 
 783     case 0xAD: // shrd r, a, %cl
 784     case 0xAF: // imul r, a
 785     case 0xBE: // movsbl r, a (movsxb)
 786     case 0xBF: // movswl r, a (movsxw)
 787     case 0xB6: // movzbl r, a (movzxb)
 788     case 0xB7: // movzwl r, a (movzxw)
 789     case REP16(0x40): // cmovl cc, r, a
 790     case 0xB0: // cmpxchgb
 791     case 0xB1: // cmpxchg
 792     case 0xC1: // xaddl
 793     case 0xC7: // cmpxchg8
 794     case REP16(0x90): // setcc a
 795       debug_only(has_disp32 = true);
 796       // fall out of the switch to decode the address
 797       break;
 798 
 799     case 0xC4: // pinsrw r, a, #8
 800       debug_only(has_disp32 = true);
 801     case 0xC5: // pextrw r, r, #8
 802       tail_size = 1;  // the imm8
 803       break;
 804 
 805     case 0xAC: // shrd r, a, #8
 806       debug_only(has_disp32 = true);
 807       tail_size = 1;  // the imm8
 808       break;
 809 
 810     case REP16(0x80): // jcc rdisp32
 811       if (which == end_pc_operand)  return ip + 4;
 812       assert(which == call32_operand, "jcc has no disp32 or imm");
 813       return ip;
 814     default:
 815       ShouldNotReachHere();
 816     }
 817     break;
 818 
 819   case 0x81: // addl a, #32; addl r, #32
 820     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 821     // on 32bit in the case of cmpl, the imm might be an oop
 822     tail_size = 4;
 823     debug_only(has_disp32 = true); // has both kinds of operands!
 824     break;
 825 
 826   case 0x83: // addl a, #8; addl r, #8
 827     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 828     debug_only(has_disp32 = true); // has both kinds of operands!
 829     tail_size = 1;
 830     break;
 831 
 832   case 0x9B:
 833     switch (0xFF & *ip++) {
 834     case 0xD9: // fnstcw a
 835       debug_only(has_disp32 = true);
 836       break;
 837     default:
 838       ShouldNotReachHere();
 839     }
 840     break;
 841 
 842   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
 843   case REP4(0x10): // adc...
 844   case REP4(0x20): // and...
 845   case REP4(0x30): // xor...
 846   case REP4(0x08): // or...
 847   case REP4(0x18): // sbb...
 848   case REP4(0x28): // sub...
 849   case 0xF7: // mull a
 850   case 0x8D: // lea r, a
 851   case 0x87: // xchg r, a
 852   case REP4(0x38): // cmp...
 853   case 0x85: // test r, a
 854     debug_only(has_disp32 = true); // has both kinds of operands!
 855     break;
 856 
 857   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
 858   case 0xC6: // movb a, #8
 859   case 0x80: // cmpb a, #8
 860   case 0x6B: // imul r, a, #8
 861     debug_only(has_disp32 = true); // has both kinds of operands!
 862     tail_size = 1; // the imm8
 863     break;
 864 
 865   case 0xC4: // VEX_3bytes
 866   case 0xC5: // VEX_2bytes
 867     assert((UseAVX > 0), "shouldn't have VEX prefix");
 868     assert(ip == inst+1, "no prefixes allowed");
 869     // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
 870     // but they have prefix 0x0F and processed when 0x0F processed above.
 871     //
 872     // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
 873     // instructions (these instructions are not supported in 64-bit mode).
 874     // To distinguish them bits [7:6] are set in the VEX second byte since
 875     // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
 876     // those VEX bits REX and vvvv bits are inverted.
 877     //
 878     // Fortunately C2 doesn't generate these instructions so we don't need
 879     // to check for them in product version.
 880 
 881     // Check second byte
 882     NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
 883 
 884     int vex_opcode;
 885     // First byte
 886     if ((0xFF & *inst) == VEX_3bytes) {
 887       vex_opcode = VEX_OPCODE_MASK & *ip;
 888       ip++; // third byte
 889       is_64bit = ((VEX_W & *ip) == VEX_W);
 890     } else {
 891       vex_opcode = VEX_OPCODE_0F;
 892     }
 893     ip++; // opcode
 894     // To find the end of instruction (which == end_pc_operand).
 895     switch (vex_opcode) {
 896       case VEX_OPCODE_0F:
 897         switch (0xFF & *ip) {
 898         case 0x70: // pshufd r, r/a, #8
 899         case 0x71: // ps[rl|ra|ll]w r, #8
 900         case 0x72: // ps[rl|ra|ll]d r, #8
 901         case 0x73: // ps[rl|ra|ll]q r, #8
 902         case 0xC2: // cmp[ps|pd|ss|sd] r, r, r/a, #8
 903         case 0xC4: // pinsrw r, r, r/a, #8
 904         case 0xC5: // pextrw r/a, r, #8
 905         case 0xC6: // shufp[s|d] r, r, r/a, #8
 906           tail_size = 1;  // the imm8
 907           break;
 908         }
 909         break;
 910       case VEX_OPCODE_0F_3A:
 911         tail_size = 1;
 912         break;
 913     }
 914     ip++; // skip opcode
 915     debug_only(has_disp32 = true); // has both kinds of operands!
 916     break;
 917 
 918   case 0x62: // EVEX_4bytes
 919     assert(VM_Version::supports_evex(), "shouldn't have EVEX prefix");
 920     assert(ip == inst+1, "no prefixes allowed");
 921     // no EVEX collisions, all instructions that have 0x62 opcodes
 922     // have EVEX versions and are subopcodes of 0x66
 923     ip++; // skip P0 and exmaine W in P1
 924     is_64bit = ((VEX_W & *ip) == VEX_W);
 925     ip++; // move to P2
 926     ip++; // skip P2, move to opcode
 927     // To find the end of instruction (which == end_pc_operand).
 928     switch (0xFF & *ip) {
 929     case 0x22: // pinsrd r, r/a, #8
 930     case 0x61: // pcmpestri r, r/a, #8
 931     case 0x70: // pshufd r, r/a, #8
 932     case 0x73: // psrldq r, #8
 933       tail_size = 1;  // the imm8
 934       break;
 935     default:
 936       break;
 937     }
 938     ip++; // skip opcode
 939     debug_only(has_disp32 = true); // has both kinds of operands!
 940     break;
 941 
 942   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 943   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 944   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 945   case 0xDD: // fld_d a; fst_d a; fstp_d a
 946   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 947   case 0xDF: // fild_d a; fistp_d a
 948   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 949   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 950   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 951     debug_only(has_disp32 = true);
 952     break;
 953 
 954   case 0xE8: // call rdisp32
 955   case 0xE9: // jmp  rdisp32
 956     if (which == end_pc_operand)  return ip + 4;
 957     assert(which == call32_operand, "call has no disp32 or imm");
 958     return ip;
 959 
 960   case 0xF0:                    // Lock
 961     assert(os::is_MP(), "only on MP");
 962     goto again_after_prefix;
 963 
 964   case 0xF3:                    // For SSE
 965   case 0xF2:                    // For SSE2
 966     switch (0xFF & *ip++) {
 967     case REX:
 968     case REX_B:
 969     case REX_X:
 970     case REX_XB:
 971     case REX_R:
 972     case REX_RB:
 973     case REX_RX:
 974     case REX_RXB:
 975     case REX_W:
 976     case REX_WB:
 977     case REX_WX:
 978     case REX_WXB:
 979     case REX_WR:
 980     case REX_WRB:
 981     case REX_WRX:
 982     case REX_WRXB:
 983       NOT_LP64(assert(false, "found 64bit prefix"));
 984       ip++;
 985     default:
 986       ip++;
 987     }
 988     debug_only(has_disp32 = true); // has both kinds of operands!
 989     break;
 990 
 991   default:
 992     ShouldNotReachHere();
 993 
 994 #undef REP8
 995 #undef REP16
 996   }
 997 
 998   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
 999 #ifdef _LP64
1000   assert(which != imm_operand, "instruction is not a movq reg, imm64");
1001 #else
1002   // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
1003   assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
1004 #endif // LP64
1005   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
1006 
1007   // parse the output of emit_operand
1008   int op2 = 0xFF & *ip++;
1009   int base = op2 & 0x07;
1010   int op3 = -1;
1011   const int b100 = 4;
1012   const int b101 = 5;
1013   if (base == b100 && (op2 >> 6) != 3) {
1014     op3 = 0xFF & *ip++;
1015     base = op3 & 0x07;   // refetch the base
1016   }
1017   // now ip points at the disp (if any)
1018 
1019   switch (op2 >> 6) {
1020   case 0:
1021     // [00 reg  100][ss index base]
1022     // [00 reg  100][00   100  esp]
1023     // [00 reg base]
1024     // [00 reg  100][ss index  101][disp32]
1025     // [00 reg  101]               [disp32]
1026 
1027     if (base == b101) {
1028       if (which == disp32_operand)
1029         return ip;              // caller wants the disp32
1030       ip += 4;                  // skip the disp32
1031     }
1032     break;
1033 
1034   case 1:
1035     // [01 reg  100][ss index base][disp8]
1036     // [01 reg  100][00   100  esp][disp8]
1037     // [01 reg base]               [disp8]
1038     ip += 1;                    // skip the disp8
1039     break;
1040 
1041   case 2:
1042     // [10 reg  100][ss index base][disp32]
1043     // [10 reg  100][00   100  esp][disp32]
1044     // [10 reg base]               [disp32]
1045     if (which == disp32_operand)
1046       return ip;                // caller wants the disp32
1047     ip += 4;                    // skip the disp32
1048     break;
1049 
1050   case 3:
1051     // [11 reg base]  (not a memory addressing mode)
1052     break;
1053   }
1054 
1055   if (which == end_pc_operand) {
1056     return ip + tail_size;
1057   }
1058 
1059 #ifdef _LP64
1060   assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
1061 #else
1062   assert(which == imm_operand, "instruction has only an imm field");
1063 #endif // LP64
1064   return ip;
1065 }
1066 
1067 address Assembler::locate_next_instruction(address inst) {
1068   // Secretly share code with locate_operand:
1069   return locate_operand(inst, end_pc_operand);
1070 }
1071 
1072 
1073 #ifdef ASSERT
1074 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
1075   address inst = inst_mark();
1076   assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
1077   address opnd;
1078 
1079   Relocation* r = rspec.reloc();
1080   if (r->type() == relocInfo::none) {
1081     return;
1082   } else if (r->is_call() || format == call32_operand) {
1083     // assert(format == imm32_operand, "cannot specify a nonzero format");
1084     opnd = locate_operand(inst, call32_operand);
1085   } else if (r->is_data()) {
1086     assert(format == imm_operand || format == disp32_operand
1087            LP64_ONLY(|| format == narrow_oop_operand), "format ok");
1088     opnd = locate_operand(inst, (WhichOperand)format);
1089   } else {
1090     assert(format == imm_operand, "cannot specify a format");
1091     return;
1092   }
1093   assert(opnd == pc(), "must put operand where relocs can find it");
1094 }
1095 #endif // ASSERT
1096 
1097 void Assembler::emit_operand32(Register reg, Address adr) {
1098   assert(reg->encoding() < 8, "no extended registers");
1099   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1100   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1101                adr._rspec);
1102 }
1103 
1104 void Assembler::emit_operand(Register reg, Address adr,
1105                              int rip_relative_correction) {
1106   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1107                adr._rspec,
1108                rip_relative_correction);
1109 }
1110 
1111 void Assembler::emit_operand(XMMRegister reg, Address adr) {
1112   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1113                adr._rspec);
1114 }
1115 
1116 // MMX operations
1117 void Assembler::emit_operand(MMXRegister reg, Address adr) {
1118   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1119   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1120 }
1121 
1122 // work around gcc (3.2.1-7a) bug
1123 void Assembler::emit_operand(Address adr, MMXRegister reg) {
1124   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1125   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1126 }
1127 
1128 
1129 void Assembler::emit_farith(int b1, int b2, int i) {
1130   assert(isByte(b1) && isByte(b2), "wrong opcode");
1131   assert(0 <= i &&  i < 8, "illegal stack offset");
1132   emit_int8(b1);
1133   emit_int8(b2 + i);
1134 }
1135 
1136 
1137 // Now the Assembler instructions (identical for 32/64 bits)
1138 
1139 void Assembler::adcl(Address dst, int32_t imm32) {
1140   InstructionMark im(this);
1141   prefix(dst);
1142   emit_arith_operand(0x81, rdx, dst, imm32);
1143 }
1144 
1145 void Assembler::adcl(Address dst, Register src) {
1146   InstructionMark im(this);
1147   prefix(dst, src);
1148   emit_int8(0x11);
1149   emit_operand(src, dst);
1150 }
1151 
1152 void Assembler::adcl(Register dst, int32_t imm32) {
1153   prefix(dst);
1154   emit_arith(0x81, 0xD0, dst, imm32);
1155 }
1156 
1157 void Assembler::adcl(Register dst, Address src) {
1158   InstructionMark im(this);
1159   prefix(src, dst);
1160   emit_int8(0x13);
1161   emit_operand(dst, src);
1162 }
1163 
1164 void Assembler::adcl(Register dst, Register src) {
1165   (void) prefix_and_encode(dst->encoding(), src->encoding());
1166   emit_arith(0x13, 0xC0, dst, src);
1167 }
1168 
1169 void Assembler::addl(Address dst, int32_t imm32) {
1170   InstructionMark im(this);
1171   prefix(dst);
1172   emit_arith_operand(0x81, rax, dst, imm32);
1173 }
1174 
1175 void Assembler::addb(Address dst, int imm8) {
1176   InstructionMark im(this);
1177   prefix(dst);
1178   emit_int8((unsigned char)0x80);
1179   emit_operand(rax, dst, 1);
1180   emit_int8(imm8);
1181 }
1182 
1183 void Assembler::addw(Address dst, int imm16) {
1184   InstructionMark im(this);
1185   emit_int8(0x66);
1186   prefix(dst);
1187   emit_int8((unsigned char)0x81);
1188   emit_operand(rax, dst, 2);
1189   emit_int16(imm16);
1190 }
1191 
1192 void Assembler::addl(Address dst, Register src) {
1193   InstructionMark im(this);
1194   prefix(dst, src);
1195   emit_int8(0x01);
1196   emit_operand(src, dst);
1197 }
1198 
1199 void Assembler::addl(Register dst, int32_t imm32) {
1200   prefix(dst);
1201   emit_arith(0x81, 0xC0, dst, imm32);
1202 }
1203 
1204 void Assembler::addl(Register dst, Address src) {
1205   InstructionMark im(this);
1206   prefix(src, dst);
1207   emit_int8(0x03);
1208   emit_operand(dst, src);
1209 }
1210 
1211 void Assembler::addl(Register dst, Register src) {
1212   (void) prefix_and_encode(dst->encoding(), src->encoding());
1213   emit_arith(0x03, 0xC0, dst, src);
1214 }
1215 
1216 void Assembler::addr_nop_4() {
1217   assert(UseAddressNop, "no CPU support");
1218   // 4 bytes: NOP DWORD PTR [EAX+0]
1219   emit_int8(0x0F);
1220   emit_int8(0x1F);
1221   emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
1222   emit_int8(0);    // 8-bits offset (1 byte)
1223 }
1224 
1225 void Assembler::addr_nop_5() {
1226   assert(UseAddressNop, "no CPU support");
1227   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
1228   emit_int8(0x0F);
1229   emit_int8(0x1F);
1230   emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
1231   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1232   emit_int8(0);    // 8-bits offset (1 byte)
1233 }
1234 
1235 void Assembler::addr_nop_7() {
1236   assert(UseAddressNop, "no CPU support");
1237   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
1238   emit_int8(0x0F);
1239   emit_int8(0x1F);
1240   emit_int8((unsigned char)0x80);
1241                    // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
1242   emit_int32(0);   // 32-bits offset (4 bytes)
1243 }
1244 
1245 void Assembler::addr_nop_8() {
1246   assert(UseAddressNop, "no CPU support");
1247   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1248   emit_int8(0x0F);
1249   emit_int8(0x1F);
1250   emit_int8((unsigned char)0x84);
1251                    // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
1252   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1253   emit_int32(0);   // 32-bits offset (4 bytes)
1254 }
1255 
1256 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
1257   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1258   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1259   attributes.set_rex_vex_w_reverted();
1260   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1261   emit_int8(0x58);
1262   emit_int8((unsigned char)(0xC0 | encode));
1263 }
1264 
1265 void Assembler::addsd(XMMRegister dst, Address src) {
1266   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1267   InstructionMark im(this);
1268   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1269   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1270   attributes.set_rex_vex_w_reverted();
1271   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1272   emit_int8(0x58);
1273   emit_operand(dst, src);
1274 }
1275 
1276 void Assembler::addss(XMMRegister dst, XMMRegister src) {
1277   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1278   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1279   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1280   emit_int8(0x58);
1281   emit_int8((unsigned char)(0xC0 | encode));
1282 }
1283 
1284 void Assembler::addss(XMMRegister dst, Address src) {
1285   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1286   InstructionMark im(this);
1287   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1288   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1289   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1290   emit_int8(0x58);
1291   emit_operand(dst, src);
1292 }
1293 
1294 void Assembler::aesdec(XMMRegister dst, Address src) {
1295   assert(VM_Version::supports_aes(), "");
1296   InstructionMark im(this);
1297   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1298   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1299   emit_int8((unsigned char)0xDE);
1300   emit_operand(dst, src);
1301 }
1302 
1303 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1304   assert(VM_Version::supports_aes(), "");
1305   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1306   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1307   emit_int8((unsigned char)0xDE);
1308   emit_int8(0xC0 | encode);
1309 }
1310 
1311 void Assembler::aesdeclast(XMMRegister dst, Address src) {
1312   assert(VM_Version::supports_aes(), "");
1313   InstructionMark im(this);
1314   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1315   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1316   emit_int8((unsigned char)0xDF);
1317   emit_operand(dst, src);
1318 }
1319 
1320 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1321   assert(VM_Version::supports_aes(), "");
1322   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1323   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1324   emit_int8((unsigned char)0xDF);
1325   emit_int8((unsigned char)(0xC0 | encode));
1326 }
1327 
1328 void Assembler::aesenc(XMMRegister dst, Address src) {
1329   assert(VM_Version::supports_aes(), "");
1330   InstructionMark im(this);
1331   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1332   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1333   emit_int8((unsigned char)0xDC);
1334   emit_operand(dst, src);
1335 }
1336 
1337 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1338   assert(VM_Version::supports_aes(), "");
1339   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1340   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1341   emit_int8((unsigned char)0xDC);
1342   emit_int8(0xC0 | encode);
1343 }
1344 
1345 void Assembler::aesenclast(XMMRegister dst, Address src) {
1346   assert(VM_Version::supports_aes(), "");
1347   InstructionMark im(this);
1348   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1349   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1350   emit_int8((unsigned char)0xDD);
1351   emit_operand(dst, src);
1352 }
1353 
1354 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1355   assert(VM_Version::supports_aes(), "");
1356   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1357   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1358   emit_int8((unsigned char)0xDD);
1359   emit_int8((unsigned char)(0xC0 | encode));
1360 }
1361 
1362 void Assembler::andl(Address dst, int32_t imm32) {
1363   InstructionMark im(this);
1364   prefix(dst);
1365   emit_int8((unsigned char)0x81);
1366   emit_operand(rsp, dst, 4);
1367   emit_int32(imm32);
1368 }
1369 
1370 void Assembler::andl(Register dst, int32_t imm32) {
1371   prefix(dst);
1372   emit_arith(0x81, 0xE0, dst, imm32);
1373 }
1374 
1375 void Assembler::andl(Register dst, Address src) {
1376   InstructionMark im(this);
1377   prefix(src, dst);
1378   emit_int8(0x23);
1379   emit_operand(dst, src);
1380 }
1381 
1382 void Assembler::andl(Register dst, Register src) {
1383   (void) prefix_and_encode(dst->encoding(), src->encoding());
1384   emit_arith(0x23, 0xC0, dst, src);
1385 }
1386 
1387 void Assembler::andnl(Register dst, Register src1, Register src2) {
1388   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1389   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1390   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1391   emit_int8((unsigned char)0xF2);
1392   emit_int8((unsigned char)(0xC0 | encode));
1393 }
1394 
1395 void Assembler::andnl(Register dst, Register src1, Address src2) {
1396   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1397   InstructionMark im(this);
1398   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1399   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1400   emit_int8((unsigned char)0xF2);
1401   emit_operand(dst, src2);
1402 }
1403 
1404 void Assembler::bsfl(Register dst, Register src) {
1405   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1406   emit_int8(0x0F);
1407   emit_int8((unsigned char)0xBC);
1408   emit_int8((unsigned char)(0xC0 | encode));
1409 }
1410 
1411 void Assembler::bsrl(Register dst, Register src) {
1412   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1413   emit_int8(0x0F);
1414   emit_int8((unsigned char)0xBD);
1415   emit_int8((unsigned char)(0xC0 | encode));
1416 }
1417 
1418 void Assembler::bswapl(Register reg) { // bswap
1419   int encode = prefix_and_encode(reg->encoding());
1420   emit_int8(0x0F);
1421   emit_int8((unsigned char)(0xC8 | encode));
1422 }
1423 
1424 void Assembler::blsil(Register dst, Register src) {
1425   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1426   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1427   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1428   emit_int8((unsigned char)0xF3);
1429   emit_int8((unsigned char)(0xC0 | encode));
1430 }
1431 
1432 void Assembler::blsil(Register dst, Address src) {
1433   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1434   InstructionMark im(this);
1435   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1436   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1437   emit_int8((unsigned char)0xF3);
1438   emit_operand(rbx, src);
1439 }
1440 
1441 void Assembler::blsmskl(Register dst, Register src) {
1442   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1443   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1444   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1445   emit_int8((unsigned char)0xF3);
1446   emit_int8((unsigned char)(0xC0 | encode));
1447 }
1448 
1449 void Assembler::blsmskl(Register dst, Address src) {
1450   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1451   InstructionMark im(this);
1452   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1453   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1454   emit_int8((unsigned char)0xF3);
1455   emit_operand(rdx, src);
1456 }
1457 
1458 void Assembler::blsrl(Register dst, Register src) {
1459   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1460   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1461   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1462   emit_int8((unsigned char)0xF3);
1463   emit_int8((unsigned char)(0xC0 | encode));
1464 }
1465 
1466 void Assembler::blsrl(Register dst, Address src) {
1467   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1468   InstructionMark im(this);
1469   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1470   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1471   emit_int8((unsigned char)0xF3);
1472   emit_operand(rcx, src);
1473 }
1474 
1475 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1476   // suspect disp32 is always good
1477   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1478 
1479   if (L.is_bound()) {
1480     const int long_size = 5;
1481     int offs = (int)( target(L) - pc() );
1482     assert(offs <= 0, "assembler error");
1483     InstructionMark im(this);
1484     // 1110 1000 #32-bit disp
1485     emit_int8((unsigned char)0xE8);
1486     emit_data(offs - long_size, rtype, operand);
1487   } else {
1488     InstructionMark im(this);
1489     // 1110 1000 #32-bit disp
1490     L.add_patch_at(code(), locator());
1491 
1492     emit_int8((unsigned char)0xE8);
1493     emit_data(int(0), rtype, operand);
1494   }
1495 }
1496 
1497 void Assembler::call(Register dst) {
1498   int encode = prefix_and_encode(dst->encoding());
1499   emit_int8((unsigned char)0xFF);
1500   emit_int8((unsigned char)(0xD0 | encode));
1501 }
1502 
1503 
1504 void Assembler::call(Address adr) {
1505   InstructionMark im(this);
1506   prefix(adr);
1507   emit_int8((unsigned char)0xFF);
1508   emit_operand(rdx, adr);
1509 }
1510 
1511 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1512   InstructionMark im(this);
1513   emit_int8((unsigned char)0xE8);
1514   intptr_t disp = entry - (pc() + sizeof(int32_t));
1515   // Entry is NULL in case of a scratch emit.
1516   assert(entry == NULL || is_simm32(disp), "disp=" INTPTR_FORMAT " must be 32bit offset (call2)", disp);
1517   // Technically, should use call32_operand, but this format is
1518   // implied by the fact that we're emitting a call instruction.
1519 
1520   int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1521   emit_data((int) disp, rspec, operand);
1522 }
1523 
1524 void Assembler::cdql() {
1525   emit_int8((unsigned char)0x99);
1526 }
1527 
1528 void Assembler::cld() {
1529   emit_int8((unsigned char)0xFC);
1530 }
1531 
1532 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1533   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1534   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1535   emit_int8(0x0F);
1536   emit_int8(0x40 | cc);
1537   emit_int8((unsigned char)(0xC0 | encode));
1538 }
1539 
1540 
1541 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1542   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1543   prefix(src, dst);
1544   emit_int8(0x0F);
1545   emit_int8(0x40 | cc);
1546   emit_operand(dst, src);
1547 }
1548 
1549 void Assembler::cmpb(Address dst, int imm8) {
1550   InstructionMark im(this);
1551   prefix(dst);
1552   emit_int8((unsigned char)0x80);
1553   emit_operand(rdi, dst, 1);
1554   emit_int8(imm8);
1555 }
1556 
1557 void Assembler::cmpl(Address dst, int32_t imm32) {
1558   InstructionMark im(this);
1559   prefix(dst);
1560   emit_int8((unsigned char)0x81);
1561   emit_operand(rdi, dst, 4);
1562   emit_int32(imm32);
1563 }
1564 
1565 void Assembler::cmpl(Register dst, int32_t imm32) {
1566   prefix(dst);
1567   emit_arith(0x81, 0xF8, dst, imm32);
1568 }
1569 
1570 void Assembler::cmpl(Register dst, Register src) {
1571   (void) prefix_and_encode(dst->encoding(), src->encoding());
1572   emit_arith(0x3B, 0xC0, dst, src);
1573 }
1574 
1575 void Assembler::cmpl(Register dst, Address  src) {
1576   InstructionMark im(this);
1577   prefix(src, dst);
1578   emit_int8((unsigned char)0x3B);
1579   emit_operand(dst, src);
1580 }
1581 
1582 void Assembler::cmpw(Address dst, int imm16) {
1583   InstructionMark im(this);
1584   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1585   emit_int8(0x66);
1586   emit_int8((unsigned char)0x81);
1587   emit_operand(rdi, dst, 2);
1588   emit_int16(imm16);
1589 }
1590 
1591 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1592 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1593 // The ZF is set if the compared values were equal, and cleared otherwise.
1594 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1595   InstructionMark im(this);
1596   prefix(adr, reg);
1597   emit_int8(0x0F);
1598   emit_int8((unsigned char)0xB1);
1599   emit_operand(reg, adr);
1600 }
1601 
1602 // The 8-bit cmpxchg compares the value at adr with the contents of rax,
1603 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1604 // The ZF is set if the compared values were equal, and cleared otherwise.
1605 void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg
1606   InstructionMark im(this);
1607   prefix(adr, reg, true);
1608   emit_int8(0x0F);
1609   emit_int8((unsigned char)0xB0);
1610   emit_operand(reg, adr);
1611 }
1612 
1613 void Assembler::comisd(XMMRegister dst, Address src) {
1614   // NOTE: dbx seems to decode this as comiss even though the
1615   // 0x66 is there. Strangly ucomisd comes out correct
1616   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1617   InstructionMark im(this);
1618   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);;
1619   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1620   attributes.set_rex_vex_w_reverted();
1621   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1622   emit_int8(0x2F);
1623   emit_operand(dst, src);
1624 }
1625 
1626 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1627   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1628   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1629   attributes.set_rex_vex_w_reverted();
1630   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1631   emit_int8(0x2F);
1632   emit_int8((unsigned char)(0xC0 | encode));
1633 }
1634 
1635 void Assembler::comiss(XMMRegister dst, Address src) {
1636   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1637   InstructionMark im(this);
1638   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1639   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1640   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1641   emit_int8(0x2F);
1642   emit_operand(dst, src);
1643 }
1644 
1645 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1646   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1647   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1648   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1649   emit_int8(0x2F);
1650   emit_int8((unsigned char)(0xC0 | encode));
1651 }
1652 
1653 void Assembler::cpuid() {
1654   emit_int8(0x0F);
1655   emit_int8((unsigned char)0xA2);
1656 }
1657 
1658 // Opcode / Instruction                      Op /  En  64 - Bit Mode     Compat / Leg Mode Description                  Implemented
1659 // F2 0F 38 F0 / r       CRC32 r32, r / m8   RM        Valid             Valid             Accumulate CRC32 on r / m8.  v
1660 // F2 REX 0F 38 F0 / r   CRC32 r32, r / m8*  RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1661 // F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8   RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1662 //
1663 // F2 0F 38 F1 / r       CRC32 r32, r / m16  RM        Valid             Valid             Accumulate CRC32 on r / m16. v
1664 //
1665 // F2 0F 38 F1 / r       CRC32 r32, r / m32  RM        Valid             Valid             Accumulate CRC32 on r / m32. v
1666 //
1667 // F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64  RM        Valid             N.E.              Accumulate CRC32 on r / m64. v
1668 void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) {
1669   assert(VM_Version::supports_sse4_2(), "");
1670   int8_t w = 0x01;
1671   Prefix p = Prefix_EMPTY;
1672 
1673   emit_int8((int8_t)0xF2);
1674   switch (sizeInBytes) {
1675   case 1:
1676     w = 0;
1677     break;
1678   case 2:
1679   case 4:
1680     break;
1681   LP64_ONLY(case 8:)
1682     // This instruction is not valid in 32 bits
1683     // Note:
1684     // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
1685     //
1686     // Page B - 72   Vol. 2C says
1687     // qwreg2 to qwreg            1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2
1688     // mem64 to qwreg             1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m
1689     //                                                                            F0!!!
1690     // while 3 - 208 Vol. 2A
1691     // F2 REX.W 0F 38 F1 / r       CRC32 r64, r / m64             RM         Valid      N.E.Accumulate CRC32 on r / m64.
1692     //
1693     // the 0 on a last bit is reserved for a different flavor of this instruction :
1694     // F2 REX.W 0F 38 F0 / r       CRC32 r64, r / m8              RM         Valid      N.E.Accumulate CRC32 on r / m8.
1695     p = REX_W;
1696     break;
1697   default:
1698     assert(0, "Unsupported value for a sizeInBytes argument");
1699     break;
1700   }
1701   LP64_ONLY(prefix(crc, v, p);)
1702   emit_int8((int8_t)0x0F);
1703   emit_int8(0x38);
1704   emit_int8((int8_t)(0xF0 | w));
1705   emit_int8(0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7));
1706 }
1707 
1708 void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) {
1709   assert(VM_Version::supports_sse4_2(), "");
1710   InstructionMark im(this);
1711   int8_t w = 0x01;
1712   Prefix p = Prefix_EMPTY;
1713 
1714   emit_int8((int8_t)0xF2);
1715   switch (sizeInBytes) {
1716   case 1:
1717     w = 0;
1718     break;
1719   case 2:
1720   case 4:
1721     break;
1722   LP64_ONLY(case 8:)
1723     // This instruction is not valid in 32 bits
1724     p = REX_W;
1725     break;
1726   default:
1727     assert(0, "Unsupported value for a sizeInBytes argument");
1728     break;
1729   }
1730   LP64_ONLY(prefix(crc, adr, p);)
1731   emit_int8((int8_t)0x0F);
1732   emit_int8(0x38);
1733   emit_int8((int8_t)(0xF0 | w));
1734   emit_operand(crc, adr);
1735 }
1736 
1737 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1738   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1739   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1740   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1741   emit_int8((unsigned char)0xE6);
1742   emit_int8((unsigned char)(0xC0 | encode));
1743 }
1744 
1745 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1746   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1747   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1748   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1749   emit_int8(0x5B);
1750   emit_int8((unsigned char)(0xC0 | encode));
1751 }
1752 
1753 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1754   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1755   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1756   attributes.set_rex_vex_w_reverted();
1757   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1758   emit_int8(0x5A);
1759   emit_int8((unsigned char)(0xC0 | encode));
1760 }
1761 
1762 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1763   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1764   InstructionMark im(this);
1765   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1766   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1767   attributes.set_rex_vex_w_reverted();
1768   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1769   emit_int8(0x5A);
1770   emit_operand(dst, src);
1771 }
1772 
1773 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1774   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1775   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1776   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1777   emit_int8(0x2A);
1778   emit_int8((unsigned char)(0xC0 | encode));
1779 }
1780 
1781 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1782   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1783   InstructionMark im(this);
1784   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1785   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1786   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1787   emit_int8(0x2A);
1788   emit_operand(dst, src);
1789 }
1790 
1791 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1792   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1793   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1794   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1795   emit_int8(0x2A);
1796   emit_int8((unsigned char)(0xC0 | encode));
1797 }
1798 
1799 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1800   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1801   InstructionMark im(this);
1802   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1803   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1804   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1805   emit_int8(0x2A);
1806   emit_operand(dst, src);
1807 }
1808 
1809 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
1810   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1811   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1812   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1813   emit_int8(0x2A);
1814   emit_int8((unsigned char)(0xC0 | encode));
1815 }
1816 
1817 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1818   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1819   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1820   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1821   emit_int8(0x5A);
1822   emit_int8((unsigned char)(0xC0 | encode));
1823 }
1824 
1825 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1826   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1827   InstructionMark im(this);
1828   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1829   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1830   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1831   emit_int8(0x5A);
1832   emit_operand(dst, src);
1833 }
1834 
1835 
1836 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1837   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1838   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1839   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1840   emit_int8(0x2C);
1841   emit_int8((unsigned char)(0xC0 | encode));
1842 }
1843 
1844 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1845   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1846   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1847   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1848   emit_int8(0x2C);
1849   emit_int8((unsigned char)(0xC0 | encode));
1850 }
1851 
1852 void Assembler::cvttpd2dq(XMMRegister dst, XMMRegister src) {
1853   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1854   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
1855   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1856   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1857   emit_int8((unsigned char)0xE6);
1858   emit_int8((unsigned char)(0xC0 | encode));
1859 }
1860 
1861 void Assembler::decl(Address dst) {
1862   // Don't use it directly. Use MacroAssembler::decrement() instead.
1863   InstructionMark im(this);
1864   prefix(dst);
1865   emit_int8((unsigned char)0xFF);
1866   emit_operand(rcx, dst);
1867 }
1868 
1869 void Assembler::divsd(XMMRegister dst, Address src) {
1870   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1871   InstructionMark im(this);
1872   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1873   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1874   attributes.set_rex_vex_w_reverted();
1875   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1876   emit_int8(0x5E);
1877   emit_operand(dst, src);
1878 }
1879 
1880 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1881   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1882   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1883   attributes.set_rex_vex_w_reverted();
1884   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1885   emit_int8(0x5E);
1886   emit_int8((unsigned char)(0xC0 | encode));
1887 }
1888 
1889 void Assembler::divss(XMMRegister dst, Address src) {
1890   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1891   InstructionMark im(this);
1892   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1893   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1894   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1895   emit_int8(0x5E);
1896   emit_operand(dst, src);
1897 }
1898 
1899 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1900   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1901   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1902   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1903   emit_int8(0x5E);
1904   emit_int8((unsigned char)(0xC0 | encode));
1905 }
1906 
1907 void Assembler::emms() {
1908   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1909   emit_int8(0x0F);
1910   emit_int8(0x77);
1911 }
1912 
1913 void Assembler::hlt() {
1914   emit_int8((unsigned char)0xF4);
1915 }
1916 
1917 void Assembler::idivl(Register src) {
1918   int encode = prefix_and_encode(src->encoding());
1919   emit_int8((unsigned char)0xF7);
1920   emit_int8((unsigned char)(0xF8 | encode));
1921 }
1922 
1923 void Assembler::divl(Register src) { // Unsigned
1924   int encode = prefix_and_encode(src->encoding());
1925   emit_int8((unsigned char)0xF7);
1926   emit_int8((unsigned char)(0xF0 | encode));
1927 }
1928 
1929 void Assembler::imull(Register src) {
1930   int encode = prefix_and_encode(src->encoding());
1931   emit_int8((unsigned char)0xF7);
1932   emit_int8((unsigned char)(0xE8 | encode));
1933 }
1934 
1935 void Assembler::imull(Register dst, Register src) {
1936   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1937   emit_int8(0x0F);
1938   emit_int8((unsigned char)0xAF);
1939   emit_int8((unsigned char)(0xC0 | encode));
1940 }
1941 
1942 
1943 void Assembler::imull(Register dst, Register src, int value) {
1944   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1945   if (is8bit(value)) {
1946     emit_int8(0x6B);
1947     emit_int8((unsigned char)(0xC0 | encode));
1948     emit_int8(value & 0xFF);
1949   } else {
1950     emit_int8(0x69);
1951     emit_int8((unsigned char)(0xC0 | encode));
1952     emit_int32(value);
1953   }
1954 }
1955 
1956 void Assembler::imull(Register dst, Address src) {
1957   InstructionMark im(this);
1958   prefix(src, dst);
1959   emit_int8(0x0F);
1960   emit_int8((unsigned char) 0xAF);
1961   emit_operand(dst, src);
1962 }
1963 
1964 
1965 void Assembler::incl(Address dst) {
1966   // Don't use it directly. Use MacroAssembler::increment() instead.
1967   InstructionMark im(this);
1968   prefix(dst);
1969   emit_int8((unsigned char)0xFF);
1970   emit_operand(rax, dst);
1971 }
1972 
1973 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
1974   InstructionMark im(this);
1975   assert((0 <= cc) && (cc < 16), "illegal cc");
1976   if (L.is_bound()) {
1977     address dst = target(L);
1978     assert(dst != NULL, "jcc most probably wrong");
1979 
1980     const int short_size = 2;
1981     const int long_size = 6;
1982     intptr_t offs = (intptr_t)dst - (intptr_t)pc();
1983     if (maybe_short && is8bit(offs - short_size)) {
1984       // 0111 tttn #8-bit disp
1985       emit_int8(0x70 | cc);
1986       emit_int8((offs - short_size) & 0xFF);
1987     } else {
1988       // 0000 1111 1000 tttn #32-bit disp
1989       assert(is_simm32(offs - long_size),
1990              "must be 32bit offset (call4)");
1991       emit_int8(0x0F);
1992       emit_int8((unsigned char)(0x80 | cc));
1993       emit_int32(offs - long_size);
1994     }
1995   } else {
1996     // Note: could eliminate cond. jumps to this jump if condition
1997     //       is the same however, seems to be rather unlikely case.
1998     // Note: use jccb() if label to be bound is very close to get
1999     //       an 8-bit displacement
2000     L.add_patch_at(code(), locator());
2001     emit_int8(0x0F);
2002     emit_int8((unsigned char)(0x80 | cc));
2003     emit_int32(0);
2004   }
2005 }
2006 
2007 void Assembler::jccb(Condition cc, Label& L) {
2008   if (L.is_bound()) {
2009     const int short_size = 2;
2010     address entry = target(L);
2011 #ifdef ASSERT
2012     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2013     intptr_t delta = short_branch_delta();
2014     if (delta != 0) {
2015       dist += (dist < 0 ? (-delta) :delta);
2016     }
2017     assert(is8bit(dist), "Dispacement too large for a short jmp");
2018 #endif
2019     intptr_t offs = (intptr_t)entry - (intptr_t)pc();
2020     // 0111 tttn #8-bit disp
2021     emit_int8(0x70 | cc);
2022     emit_int8((offs - short_size) & 0xFF);
2023   } else {
2024     InstructionMark im(this);
2025     L.add_patch_at(code(), locator());
2026     emit_int8(0x70 | cc);
2027     emit_int8(0);
2028   }
2029 }
2030 
2031 void Assembler::jmp(Address adr) {
2032   InstructionMark im(this);
2033   prefix(adr);
2034   emit_int8((unsigned char)0xFF);
2035   emit_operand(rsp, adr);
2036 }
2037 
2038 void Assembler::jmp(Label& L, bool maybe_short) {
2039   if (L.is_bound()) {
2040     address entry = target(L);
2041     assert(entry != NULL, "jmp most probably wrong");
2042     InstructionMark im(this);
2043     const int short_size = 2;
2044     const int long_size = 5;
2045     intptr_t offs = entry - pc();
2046     if (maybe_short && is8bit(offs - short_size)) {
2047       emit_int8((unsigned char)0xEB);
2048       emit_int8((offs - short_size) & 0xFF);
2049     } else {
2050       emit_int8((unsigned char)0xE9);
2051       emit_int32(offs - long_size);
2052     }
2053   } else {
2054     // By default, forward jumps are always 32-bit displacements, since
2055     // we can't yet know where the label will be bound.  If you're sure that
2056     // the forward jump will not run beyond 256 bytes, use jmpb to
2057     // force an 8-bit displacement.
2058     InstructionMark im(this);
2059     L.add_patch_at(code(), locator());
2060     emit_int8((unsigned char)0xE9);
2061     emit_int32(0);
2062   }
2063 }
2064 
2065 void Assembler::jmp(Register entry) {
2066   int encode = prefix_and_encode(entry->encoding());
2067   emit_int8((unsigned char)0xFF);
2068   emit_int8((unsigned char)(0xE0 | encode));
2069 }
2070 
2071 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
2072   InstructionMark im(this);
2073   emit_int8((unsigned char)0xE9);
2074   assert(dest != NULL, "must have a target");
2075   intptr_t disp = dest - (pc() + sizeof(int32_t));
2076   assert(is_simm32(disp), "must be 32bit offset (jmp)");
2077   emit_data(disp, rspec.reloc(), call32_operand);
2078 }
2079 
2080 void Assembler::jmpb(Label& L) {
2081   if (L.is_bound()) {
2082     const int short_size = 2;
2083     address entry = target(L);
2084     assert(entry != NULL, "jmp most probably wrong");
2085 #ifdef ASSERT
2086     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2087     intptr_t delta = short_branch_delta();
2088     if (delta != 0) {
2089       dist += (dist < 0 ? (-delta) :delta);
2090     }
2091     assert(is8bit(dist), "Dispacement too large for a short jmp");
2092 #endif
2093     intptr_t offs = entry - pc();
2094     emit_int8((unsigned char)0xEB);
2095     emit_int8((offs - short_size) & 0xFF);
2096   } else {
2097     InstructionMark im(this);
2098     L.add_patch_at(code(), locator());
2099     emit_int8((unsigned char)0xEB);
2100     emit_int8(0);
2101   }
2102 }
2103 
2104 void Assembler::ldmxcsr( Address src) {
2105   if (UseAVX > 0 ) {
2106     InstructionMark im(this);
2107     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2108     vex_prefix(src, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2109     emit_int8((unsigned char)0xAE);
2110     emit_operand(as_Register(2), src);
2111   } else {
2112     NOT_LP64(assert(VM_Version::supports_sse(), ""));
2113     InstructionMark im(this);
2114     prefix(src);
2115     emit_int8(0x0F);
2116     emit_int8((unsigned char)0xAE);
2117     emit_operand(as_Register(2), src);
2118   }
2119 }
2120 
2121 void Assembler::leal(Register dst, Address src) {
2122   InstructionMark im(this);
2123 #ifdef _LP64
2124   emit_int8(0x67); // addr32
2125   prefix(src, dst);
2126 #endif // LP64
2127   emit_int8((unsigned char)0x8D);
2128   emit_operand(dst, src);
2129 }
2130 
2131 void Assembler::lfence() {
2132   emit_int8(0x0F);
2133   emit_int8((unsigned char)0xAE);
2134   emit_int8((unsigned char)0xE8);
2135 }
2136 
2137 void Assembler::lock() {
2138   emit_int8((unsigned char)0xF0);
2139 }
2140 
2141 void Assembler::lzcntl(Register dst, Register src) {
2142   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
2143   emit_int8((unsigned char)0xF3);
2144   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2145   emit_int8(0x0F);
2146   emit_int8((unsigned char)0xBD);
2147   emit_int8((unsigned char)(0xC0 | encode));
2148 }
2149 
2150 // Emit mfence instruction
2151 void Assembler::mfence() {
2152   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
2153   emit_int8(0x0F);
2154   emit_int8((unsigned char)0xAE);
2155   emit_int8((unsigned char)0xF0);
2156 }
2157 
2158 void Assembler::mov(Register dst, Register src) {
2159   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2160 }
2161 
2162 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
2163   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2164   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2165   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2166   attributes.set_rex_vex_w_reverted();
2167   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2168   emit_int8(0x28);
2169   emit_int8((unsigned char)(0xC0 | encode));
2170 }
2171 
2172 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
2173   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2174   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2175   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2176   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2177   emit_int8(0x28);
2178   emit_int8((unsigned char)(0xC0 | encode));
2179 }
2180 
2181 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
2182   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2183   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2184   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2185   emit_int8(0x16);
2186   emit_int8((unsigned char)(0xC0 | encode));
2187 }
2188 
2189 void Assembler::movb(Register dst, Address src) {
2190   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2191   InstructionMark im(this);
2192   prefix(src, dst, true);
2193   emit_int8((unsigned char)0x8A);
2194   emit_operand(dst, src);
2195 }
2196 
2197 void Assembler::movddup(XMMRegister dst, XMMRegister src) {
2198   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
2199   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2200   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2201   attributes.set_rex_vex_w_reverted();
2202   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2203   emit_int8(0x12);
2204   emit_int8(0xC0 | encode);
2205 }
2206 
2207 void Assembler::kmovbl(KRegister dst, Register src) {
2208   assert(VM_Version::supports_avx512dq(), "");
2209   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2210   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2211   emit_int8((unsigned char)0x92);
2212   emit_int8((unsigned char)(0xC0 | encode));
2213 }
2214 
2215 void Assembler::kmovbl(Register dst, KRegister src) {
2216   assert(VM_Version::supports_avx512dq(), "");
2217   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2218   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2219   emit_int8((unsigned char)0x93);
2220   emit_int8((unsigned char)(0xC0 | encode));
2221 }
2222 
2223 void Assembler::kmovwl(KRegister dst, Register src) {
2224   assert(VM_Version::supports_evex(), "");
2225   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2226   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2227   emit_int8((unsigned char)0x92);
2228   emit_int8((unsigned char)(0xC0 | encode));
2229 }
2230 
2231 void Assembler::kmovwl(Register dst, KRegister src) {
2232   assert(VM_Version::supports_evex(), "");
2233   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2234   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2235   emit_int8((unsigned char)0x93);
2236   emit_int8((unsigned char)(0xC0 | encode));
2237 }
2238 
2239 void Assembler::kmovwl(KRegister dst, Address src) {
2240   assert(VM_Version::supports_evex(), "");
2241   InstructionMark im(this);
2242   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2243   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2244   emit_int8((unsigned char)0x90);
2245   emit_operand((Register)dst, src);
2246 }
2247 
2248 void Assembler::kmovdl(KRegister dst, Register src) {
2249   assert(VM_Version::supports_avx512bw(), "");
2250   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2251   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2252   emit_int8((unsigned char)0x92);
2253   emit_int8((unsigned char)(0xC0 | encode));
2254 }
2255 
2256 void Assembler::kmovdl(Register dst, KRegister src) {
2257   assert(VM_Version::supports_avx512bw(), "");
2258   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2259   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2260   emit_int8((unsigned char)0x93);
2261   emit_int8((unsigned char)(0xC0 | encode));
2262 }
2263 
2264 void Assembler::kmovql(KRegister dst, KRegister src) {
2265   assert(VM_Version::supports_avx512bw(), "");
2266   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2267   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2268   emit_int8((unsigned char)0x90);
2269   emit_int8((unsigned char)(0xC0 | encode));
2270 }
2271 
2272 void Assembler::kmovql(KRegister dst, Address src) {
2273   assert(VM_Version::supports_avx512bw(), "");
2274   InstructionMark im(this);
2275   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2276   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2277   emit_int8((unsigned char)0x90);
2278   emit_operand((Register)dst, src);
2279 }
2280 
2281 void Assembler::kmovql(Address dst, KRegister src) {
2282   assert(VM_Version::supports_avx512bw(), "");
2283   InstructionMark im(this);
2284   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2285   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2286   emit_int8((unsigned char)0x90);
2287   emit_operand((Register)src, dst);
2288 }
2289 
2290 void Assembler::kmovql(KRegister dst, Register src) {
2291   assert(VM_Version::supports_avx512bw(), "");
2292   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2293   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2294   emit_int8((unsigned char)0x92);
2295   emit_int8((unsigned char)(0xC0 | encode));
2296 }
2297 
2298 void Assembler::kmovql(Register dst, KRegister src) {
2299   assert(VM_Version::supports_avx512bw(), "");
2300   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2301   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2302   emit_int8((unsigned char)0x93);
2303   emit_int8((unsigned char)(0xC0 | encode));
2304 }
2305 
2306 void Assembler::knotwl(KRegister dst, KRegister src) {
2307   assert(VM_Version::supports_evex(), "");
2308   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2309   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2310   emit_int8((unsigned char)0x44);
2311   emit_int8((unsigned char)(0xC0 | encode));
2312 }
2313 
2314 // This instruction produces ZF or CF flags
2315 void Assembler::kortestbl(KRegister src1, KRegister src2) {
2316   assert(VM_Version::supports_avx512dq(), "");
2317   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2318   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2319   emit_int8((unsigned char)0x98);
2320   emit_int8((unsigned char)(0xC0 | encode));
2321 }
2322 
2323 // This instruction produces ZF or CF flags
2324 void Assembler::kortestwl(KRegister src1, KRegister src2) {
2325   assert(VM_Version::supports_evex(), "");
2326   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2327   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2328   emit_int8((unsigned char)0x98);
2329   emit_int8((unsigned char)(0xC0 | encode));
2330 }
2331 
2332 // This instruction produces ZF or CF flags
2333 void Assembler::kortestdl(KRegister src1, KRegister src2) {
2334   assert(VM_Version::supports_avx512bw(), "");
2335   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2336   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2337   emit_int8((unsigned char)0x98);
2338   emit_int8((unsigned char)(0xC0 | encode));
2339 }
2340 
2341 // This instruction produces ZF or CF flags
2342 void Assembler::kortestql(KRegister src1, KRegister src2) {
2343   assert(VM_Version::supports_avx512bw(), "");
2344   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2345   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2346   emit_int8((unsigned char)0x98);
2347   emit_int8((unsigned char)(0xC0 | encode));
2348 }
2349 
2350 // This instruction produces ZF or CF flags
2351 void Assembler::ktestql(KRegister src1, KRegister src2) {
2352   assert(VM_Version::supports_avx512bw(), "");
2353   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2354   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2355   emit_int8((unsigned char)0x99);
2356   emit_int8((unsigned char)(0xC0 | encode));
2357 }
2358 
2359 void Assembler::ktestq(KRegister src1, KRegister src2) {
2360   assert(VM_Version::supports_avx512bw(), "");
2361   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2362   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2363   emit_int8((unsigned char)0x99);
2364   emit_int8((unsigned char)(0xC0 | encode));
2365 }
2366 
2367 void Assembler::ktestd(KRegister src1, KRegister src2) {
2368   assert(VM_Version::supports_avx512bw(), "");
2369   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2370   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2371   emit_int8((unsigned char)0x99);
2372   emit_int8((unsigned char)(0xC0 | encode));
2373 }
2374 
2375 void Assembler::movb(Address dst, int imm8) {
2376   InstructionMark im(this);
2377    prefix(dst);
2378   emit_int8((unsigned char)0xC6);
2379   emit_operand(rax, dst, 1);
2380   emit_int8(imm8);
2381 }
2382 
2383 
2384 void Assembler::movb(Address dst, Register src) {
2385   assert(src->has_byte_register(), "must have byte register");
2386   InstructionMark im(this);
2387   prefix(dst, src, true);
2388   emit_int8((unsigned char)0x88);
2389   emit_operand(src, dst);
2390 }
2391 
2392 void Assembler::movdl(XMMRegister dst, Register src) {
2393   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2394   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2395   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2396   emit_int8(0x6E);
2397   emit_int8((unsigned char)(0xC0 | encode));
2398 }
2399 
2400 void Assembler::movdl(Register dst, XMMRegister src) {
2401   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2402   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2403   // swap src/dst to get correct prefix
2404   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2405   emit_int8(0x7E);
2406   emit_int8((unsigned char)(0xC0 | encode));
2407 }
2408 
2409 void Assembler::movdl(XMMRegister dst, Address src) {
2410   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2411   InstructionMark im(this);
2412   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2413   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2414   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2415   emit_int8(0x6E);
2416   emit_operand(dst, src);
2417 }
2418 
2419 void Assembler::movdl(Address dst, XMMRegister src) {
2420   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2421   InstructionMark im(this);
2422   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2423   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2424   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2425   emit_int8(0x7E);
2426   emit_operand(src, dst);
2427 }
2428 
2429 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
2430   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2431   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2432   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2433   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2434   emit_int8(0x6F);
2435   emit_int8((unsigned char)(0xC0 | encode));
2436 }
2437 
2438 void Assembler::movdqa(XMMRegister dst, Address src) {
2439   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2440   InstructionMark im(this);
2441   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2442   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2443   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2444   emit_int8(0x6F);
2445   emit_operand(dst, src);
2446 }
2447 
2448 void Assembler::movdqu(XMMRegister dst, Address src) {
2449   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2450   InstructionMark im(this);
2451   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2452   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2453   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2454   emit_int8(0x6F);
2455   emit_operand(dst, src);
2456 }
2457 
2458 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
2459   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2460   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2461   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2462   emit_int8(0x6F);
2463   emit_int8((unsigned char)(0xC0 | encode));
2464 }
2465 
2466 void Assembler::movdqu(Address dst, XMMRegister src) {
2467   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2468   InstructionMark im(this);
2469   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2470   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2471   attributes.reset_is_clear_context();
2472   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2473   emit_int8(0x7F);
2474   emit_operand(src, dst);
2475 }
2476 
2477 // Move Unaligned 256bit Vector
2478 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2479   assert(UseAVX > 0, "");
2480   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2481   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2482   emit_int8(0x6F);
2483   emit_int8((unsigned char)(0xC0 | encode));
2484 }
2485 
2486 void Assembler::vmovdqu(XMMRegister dst, Address src) {
2487   assert(UseAVX > 0, "");
2488   InstructionMark im(this);
2489   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2490   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2491   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2492   emit_int8(0x6F);
2493   emit_operand(dst, src);
2494 }
2495 
2496 void Assembler::vmovdqu(Address dst, XMMRegister src) {
2497   assert(UseAVX > 0, "");
2498   InstructionMark im(this);
2499   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2500   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2501   attributes.reset_is_clear_context();
2502   // swap src<->dst for encoding
2503   assert(src != xnoreg, "sanity");
2504   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2505   emit_int8(0x7F);
2506   emit_operand(src, dst);
2507 }
2508 
2509 // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
2510 void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) {
2511   assert(VM_Version::supports_evex(), "");
2512   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2513   attributes.set_is_evex_instruction();
2514   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2515   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2516   emit_int8(0x6F);
2517   emit_int8((unsigned char)(0xC0 | encode));
2518 }
2519 
2520 void Assembler::evmovdqub(XMMRegister dst, Address src, int vector_len) {
2521   assert(VM_Version::supports_evex(), "");
2522   InstructionMark im(this);
2523   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2524   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2525   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2526   attributes.set_is_evex_instruction();
2527   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2528   emit_int8(0x6F);
2529   emit_operand(dst, src);
2530 }
2531 
2532 void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
2533   assert(VM_Version::supports_evex(), "");
2534   assert(src != xnoreg, "sanity");
2535   InstructionMark im(this);
2536   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2537   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2538   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2539   attributes.set_is_evex_instruction();
2540   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2541   emit_int8(0x7F);
2542   emit_operand(src, dst);
2543 }
2544 
2545 void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, int vector_len) {
2546   assert(VM_Version::supports_avx512vlbw(), "");
2547   assert(is_vector_masking(), "");    // For stub code use only
2548   InstructionMark im(this);
2549   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
2550   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2551   attributes.set_embedded_opmask_register_specifier(mask);
2552   attributes.set_is_evex_instruction();
2553   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2554   emit_int8(0x6F);
2555   emit_operand(dst, src);
2556 }
2557 
2558 void Assembler::evmovdquw(XMMRegister dst, Address src, int vector_len) {
2559   assert(VM_Version::supports_evex(), "");
2560   InstructionMark im(this);
2561   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2562   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2563   attributes.set_is_evex_instruction();
2564   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2565   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2566   emit_int8(0x6F);
2567   emit_operand(dst, src);
2568 }
2569 
2570 void Assembler::evmovdquw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
2571   assert(is_vector_masking(), "");
2572   assert(VM_Version::supports_avx512vlbw(), "");
2573   InstructionMark im(this);
2574   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
2575   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2576   attributes.set_embedded_opmask_register_specifier(mask);
2577   attributes.set_is_evex_instruction();
2578   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2579   emit_int8(0x6F);
2580   emit_operand(dst, src);
2581 }
2582 
2583 void Assembler::evmovdquw(Address dst, XMMRegister src, int vector_len) {
2584   assert(VM_Version::supports_evex(), "");
2585   assert(src != xnoreg, "sanity");
2586   InstructionMark im(this);
2587   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2588   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2589   attributes.set_is_evex_instruction();
2590   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2591   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2592   emit_int8(0x7F);
2593   emit_operand(src, dst);
2594 }
2595 
2596 void Assembler::evmovdquw(Address dst, KRegister mask, XMMRegister src, int vector_len) {
2597   assert(VM_Version::supports_avx512vlbw(), "");
2598   assert(src != xnoreg, "sanity");
2599   InstructionMark im(this);
2600   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2601   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2602   attributes.reset_is_clear_context();
2603   attributes.set_embedded_opmask_register_specifier(mask);
2604   attributes.set_is_evex_instruction();
2605   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2606   emit_int8(0x7F);
2607   emit_operand(src, dst);
2608 }
2609 
2610 void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
2611   assert(VM_Version::supports_evex(), "");
2612   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2613   attributes.set_is_evex_instruction();
2614   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2615   emit_int8(0x6F);
2616   emit_int8((unsigned char)(0xC0 | encode));
2617 }
2618 
2619 void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) {
2620   assert(VM_Version::supports_evex(), "");
2621   InstructionMark im(this);
2622   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false , /* uses_vl */ true);
2623   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2624   attributes.set_is_evex_instruction();
2625   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2626   emit_int8(0x6F);
2627   emit_operand(dst, src);
2628 }
2629 
2630 void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
2631   assert(VM_Version::supports_evex(), "");
2632   assert(src != xnoreg, "sanity");
2633   InstructionMark im(this);
2634   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2635   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2636   attributes.reset_is_clear_context();
2637   attributes.set_is_evex_instruction();
2638   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2639   emit_int8(0x7F);
2640   emit_operand(src, dst);
2641 }
2642 
2643 void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
2644   assert(VM_Version::supports_evex(), "");
2645   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2646   attributes.set_is_evex_instruction();
2647   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2648   emit_int8(0x6F);
2649   emit_int8((unsigned char)(0xC0 | encode));
2650 }
2651 
2652 void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
2653   assert(VM_Version::supports_evex(), "");
2654   InstructionMark im(this);
2655   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2656   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2657   attributes.set_is_evex_instruction();
2658   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2659   emit_int8(0x6F);
2660   emit_operand(dst, src);
2661 }
2662 
2663 void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
2664   assert(VM_Version::supports_evex(), "");
2665   assert(src != xnoreg, "sanity");
2666   InstructionMark im(this);
2667   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2668   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2669   attributes.reset_is_clear_context();
2670   attributes.set_is_evex_instruction();
2671   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2672   emit_int8(0x7F);
2673   emit_operand(src, dst);
2674 }
2675 
2676 // Uses zero extension on 64bit
2677 
2678 void Assembler::movl(Register dst, int32_t imm32) {
2679   int encode = prefix_and_encode(dst->encoding());
2680   emit_int8((unsigned char)(0xB8 | encode));
2681   emit_int32(imm32);
2682 }
2683 
2684 void Assembler::movl(Register dst, Register src) {
2685   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2686   emit_int8((unsigned char)0x8B);
2687   emit_int8((unsigned char)(0xC0 | encode));
2688 }
2689 
2690 void Assembler::movl(Register dst, Address src) {
2691   InstructionMark im(this);
2692   prefix(src, dst);
2693   emit_int8((unsigned char)0x8B);
2694   emit_operand(dst, src);
2695 }
2696 
2697 void Assembler::movl(Address dst, int32_t imm32) {
2698   InstructionMark im(this);
2699   prefix(dst);
2700   emit_int8((unsigned char)0xC7);
2701   emit_operand(rax, dst, 4);
2702   emit_int32(imm32);
2703 }
2704 
2705 void Assembler::movl(Address dst, Register src) {
2706   InstructionMark im(this);
2707   prefix(dst, src);
2708   emit_int8((unsigned char)0x89);
2709   emit_operand(src, dst);
2710 }
2711 
2712 // New cpus require to use movsd and movss to avoid partial register stall
2713 // when loading from memory. But for old Opteron use movlpd instead of movsd.
2714 // The selection is done in MacroAssembler::movdbl() and movflt().
2715 void Assembler::movlpd(XMMRegister dst, Address src) {
2716   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2717   InstructionMark im(this);
2718   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2719   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2720   attributes.set_rex_vex_w_reverted();
2721   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2722   emit_int8(0x12);
2723   emit_operand(dst, src);
2724 }
2725 
2726 void Assembler::movq( MMXRegister dst, Address src ) {
2727   assert( VM_Version::supports_mmx(), "" );
2728   emit_int8(0x0F);
2729   emit_int8(0x6F);
2730   emit_operand(dst, src);
2731 }
2732 
2733 void Assembler::movq( Address dst, MMXRegister src ) {
2734   assert( VM_Version::supports_mmx(), "" );
2735   emit_int8(0x0F);
2736   emit_int8(0x7F);
2737   // workaround gcc (3.2.1-7a) bug
2738   // In that version of gcc with only an emit_operand(MMX, Address)
2739   // gcc will tail jump and try and reverse the parameters completely
2740   // obliterating dst in the process. By having a version available
2741   // that doesn't need to swap the args at the tail jump the bug is
2742   // avoided.
2743   emit_operand(dst, src);
2744 }
2745 
2746 void Assembler::movq(XMMRegister dst, Address src) {
2747   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2748   InstructionMark im(this);
2749   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2750   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2751   attributes.set_rex_vex_w_reverted();
2752   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2753   emit_int8(0x7E);
2754   emit_operand(dst, src);
2755 }
2756 
2757 void Assembler::movq(Address dst, XMMRegister src) {
2758   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2759   InstructionMark im(this);
2760   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2761   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2762   attributes.set_rex_vex_w_reverted();
2763   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2764   emit_int8((unsigned char)0xD6);
2765   emit_operand(src, dst);
2766 }
2767 
2768 void Assembler::movsbl(Register dst, Address src) { // movsxb
2769   InstructionMark im(this);
2770   prefix(src, dst);
2771   emit_int8(0x0F);
2772   emit_int8((unsigned char)0xBE);
2773   emit_operand(dst, src);
2774 }
2775 
2776 void Assembler::movsbl(Register dst, Register src) { // movsxb
2777   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2778   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2779   emit_int8(0x0F);
2780   emit_int8((unsigned char)0xBE);
2781   emit_int8((unsigned char)(0xC0 | encode));
2782 }
2783 
2784 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
2785   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2786   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2787   attributes.set_rex_vex_w_reverted();
2788   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2789   emit_int8(0x10);
2790   emit_int8((unsigned char)(0xC0 | encode));
2791 }
2792 
2793 void Assembler::movsd(XMMRegister dst, Address src) {
2794   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2795   InstructionMark im(this);
2796   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2797   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2798   attributes.set_rex_vex_w_reverted();
2799   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2800   emit_int8(0x10);
2801   emit_operand(dst, src);
2802 }
2803 
2804 void Assembler::movsd(Address dst, XMMRegister src) {
2805   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2806   InstructionMark im(this);
2807   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2808   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2809   attributes.reset_is_clear_context();
2810   attributes.set_rex_vex_w_reverted();
2811   simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2812   emit_int8(0x11);
2813   emit_operand(src, dst);
2814 }
2815 
2816 void Assembler::movss(XMMRegister dst, XMMRegister src) {
2817   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2818   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2819   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2820   emit_int8(0x10);
2821   emit_int8((unsigned char)(0xC0 | encode));
2822 }
2823 
2824 void Assembler::movss(XMMRegister dst, Address src) {
2825   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2826   InstructionMark im(this);
2827   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2828   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2829   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2830   emit_int8(0x10);
2831   emit_operand(dst, src);
2832 }
2833 
2834 void Assembler::movss(Address dst, XMMRegister src) {
2835   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2836   InstructionMark im(this);
2837   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2838   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2839   attributes.reset_is_clear_context();
2840   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2841   emit_int8(0x11);
2842   emit_operand(src, dst);
2843 }
2844 
2845 void Assembler::movswl(Register dst, Address src) { // movsxw
2846   InstructionMark im(this);
2847   prefix(src, dst);
2848   emit_int8(0x0F);
2849   emit_int8((unsigned char)0xBF);
2850   emit_operand(dst, src);
2851 }
2852 
2853 void Assembler::movswl(Register dst, Register src) { // movsxw
2854   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2855   emit_int8(0x0F);
2856   emit_int8((unsigned char)0xBF);
2857   emit_int8((unsigned char)(0xC0 | encode));
2858 }
2859 
2860 void Assembler::movw(Address dst, int imm16) {
2861   InstructionMark im(this);
2862 
2863   emit_int8(0x66); // switch to 16-bit mode
2864   prefix(dst);
2865   emit_int8((unsigned char)0xC7);
2866   emit_operand(rax, dst, 2);
2867   emit_int16(imm16);
2868 }
2869 
2870 void Assembler::movw(Register dst, Address src) {
2871   InstructionMark im(this);
2872   emit_int8(0x66);
2873   prefix(src, dst);
2874   emit_int8((unsigned char)0x8B);
2875   emit_operand(dst, src);
2876 }
2877 
2878 void Assembler::movw(Address dst, Register src) {
2879   InstructionMark im(this);
2880   emit_int8(0x66);
2881   prefix(dst, src);
2882   emit_int8((unsigned char)0x89);
2883   emit_operand(src, dst);
2884 }
2885 
2886 void Assembler::movzbl(Register dst, Address src) { // movzxb
2887   InstructionMark im(this);
2888   prefix(src, dst);
2889   emit_int8(0x0F);
2890   emit_int8((unsigned char)0xB6);
2891   emit_operand(dst, src);
2892 }
2893 
2894 void Assembler::movzbl(Register dst, Register src) { // movzxb
2895   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2896   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2897   emit_int8(0x0F);
2898   emit_int8((unsigned char)0xB6);
2899   emit_int8(0xC0 | encode);
2900 }
2901 
2902 void Assembler::movzwl(Register dst, Address src) { // movzxw
2903   InstructionMark im(this);
2904   prefix(src, dst);
2905   emit_int8(0x0F);
2906   emit_int8((unsigned char)0xB7);
2907   emit_operand(dst, src);
2908 }
2909 
2910 void Assembler::movzwl(Register dst, Register src) { // movzxw
2911   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2912   emit_int8(0x0F);
2913   emit_int8((unsigned char)0xB7);
2914   emit_int8(0xC0 | encode);
2915 }
2916 
2917 void Assembler::mull(Address src) {
2918   InstructionMark im(this);
2919   prefix(src);
2920   emit_int8((unsigned char)0xF7);
2921   emit_operand(rsp, src);
2922 }
2923 
2924 void Assembler::mull(Register src) {
2925   int encode = prefix_and_encode(src->encoding());
2926   emit_int8((unsigned char)0xF7);
2927   emit_int8((unsigned char)(0xE0 | encode));
2928 }
2929 
2930 void Assembler::mulsd(XMMRegister dst, Address src) {
2931   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2932   InstructionMark im(this);
2933   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2934   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2935   attributes.set_rex_vex_w_reverted();
2936   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2937   emit_int8(0x59);
2938   emit_operand(dst, src);
2939 }
2940 
2941 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
2942   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2943   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2944   attributes.set_rex_vex_w_reverted();
2945   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2946   emit_int8(0x59);
2947   emit_int8((unsigned char)(0xC0 | encode));
2948 }
2949 
2950 void Assembler::mulss(XMMRegister dst, Address src) {
2951   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2952   InstructionMark im(this);
2953   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2954   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2955   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2956   emit_int8(0x59);
2957   emit_operand(dst, src);
2958 }
2959 
2960 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
2961   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2962   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2963   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2964   emit_int8(0x59);
2965   emit_int8((unsigned char)(0xC0 | encode));
2966 }
2967 
2968 void Assembler::negl(Register dst) {
2969   int encode = prefix_and_encode(dst->encoding());
2970   emit_int8((unsigned char)0xF7);
2971   emit_int8((unsigned char)(0xD8 | encode));
2972 }
2973 
2974 void Assembler::nop(int i) {
2975 #ifdef ASSERT
2976   assert(i > 0, " ");
2977   // The fancy nops aren't currently recognized by debuggers making it a
2978   // pain to disassemble code while debugging. If asserts are on clearly
2979   // speed is not an issue so simply use the single byte traditional nop
2980   // to do alignment.
2981 
2982   for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
2983   return;
2984 
2985 #endif // ASSERT
2986 
2987   if (UseAddressNop && VM_Version::is_intel()) {
2988     //
2989     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
2990     //  1: 0x90
2991     //  2: 0x66 0x90
2992     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2993     //  4: 0x0F 0x1F 0x40 0x00
2994     //  5: 0x0F 0x1F 0x44 0x00 0x00
2995     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2996     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2997     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2998     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2999     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3000     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3001 
3002     // The rest coding is Intel specific - don't use consecutive address nops
3003 
3004     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3005     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3006     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3007     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3008 
3009     while(i >= 15) {
3010       // For Intel don't generate consecutive addess nops (mix with regular nops)
3011       i -= 15;
3012       emit_int8(0x66);   // size prefix
3013       emit_int8(0x66);   // size prefix
3014       emit_int8(0x66);   // size prefix
3015       addr_nop_8();
3016       emit_int8(0x66);   // size prefix
3017       emit_int8(0x66);   // size prefix
3018       emit_int8(0x66);   // size prefix
3019       emit_int8((unsigned char)0x90);
3020                          // nop
3021     }
3022     switch (i) {
3023       case 14:
3024         emit_int8(0x66); // size prefix
3025       case 13:
3026         emit_int8(0x66); // size prefix
3027       case 12:
3028         addr_nop_8();
3029         emit_int8(0x66); // size prefix
3030         emit_int8(0x66); // size prefix
3031         emit_int8(0x66); // size prefix
3032         emit_int8((unsigned char)0x90);
3033                          // nop
3034         break;
3035       case 11:
3036         emit_int8(0x66); // size prefix
3037       case 10:
3038         emit_int8(0x66); // size prefix
3039       case 9:
3040         emit_int8(0x66); // size prefix
3041       case 8:
3042         addr_nop_8();
3043         break;
3044       case 7:
3045         addr_nop_7();
3046         break;
3047       case 6:
3048         emit_int8(0x66); // size prefix
3049       case 5:
3050         addr_nop_5();
3051         break;
3052       case 4:
3053         addr_nop_4();
3054         break;
3055       case 3:
3056         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3057         emit_int8(0x66); // size prefix
3058       case 2:
3059         emit_int8(0x66); // size prefix
3060       case 1:
3061         emit_int8((unsigned char)0x90);
3062                          // nop
3063         break;
3064       default:
3065         assert(i == 0, " ");
3066     }
3067     return;
3068   }
3069   if (UseAddressNop && VM_Version::is_amd()) {
3070     //
3071     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
3072     //  1: 0x90
3073     //  2: 0x66 0x90
3074     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3075     //  4: 0x0F 0x1F 0x40 0x00
3076     //  5: 0x0F 0x1F 0x44 0x00 0x00
3077     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3078     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3079     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3080     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3081     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3082     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3083 
3084     // The rest coding is AMD specific - use consecutive address nops
3085 
3086     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3087     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3088     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3089     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3090     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3091     //     Size prefixes (0x66) are added for larger sizes
3092 
3093     while(i >= 22) {
3094       i -= 11;
3095       emit_int8(0x66); // size prefix
3096       emit_int8(0x66); // size prefix
3097       emit_int8(0x66); // size prefix
3098       addr_nop_8();
3099     }
3100     // Generate first nop for size between 21-12
3101     switch (i) {
3102       case 21:
3103         i -= 1;
3104         emit_int8(0x66); // size prefix
3105       case 20:
3106       case 19:
3107         i -= 1;
3108         emit_int8(0x66); // size prefix
3109       case 18:
3110       case 17:
3111         i -= 1;
3112         emit_int8(0x66); // size prefix
3113       case 16:
3114       case 15:
3115         i -= 8;
3116         addr_nop_8();
3117         break;
3118       case 14:
3119       case 13:
3120         i -= 7;
3121         addr_nop_7();
3122         break;
3123       case 12:
3124         i -= 6;
3125         emit_int8(0x66); // size prefix
3126         addr_nop_5();
3127         break;
3128       default:
3129         assert(i < 12, " ");
3130     }
3131 
3132     // Generate second nop for size between 11-1
3133     switch (i) {
3134       case 11:
3135         emit_int8(0x66); // size prefix
3136       case 10:
3137         emit_int8(0x66); // size prefix
3138       case 9:
3139         emit_int8(0x66); // size prefix
3140       case 8:
3141         addr_nop_8();
3142         break;
3143       case 7:
3144         addr_nop_7();
3145         break;
3146       case 6:
3147         emit_int8(0x66); // size prefix
3148       case 5:
3149         addr_nop_5();
3150         break;
3151       case 4:
3152         addr_nop_4();
3153         break;
3154       case 3:
3155         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3156         emit_int8(0x66); // size prefix
3157       case 2:
3158         emit_int8(0x66); // size prefix
3159       case 1:
3160         emit_int8((unsigned char)0x90);
3161                          // nop
3162         break;
3163       default:
3164         assert(i == 0, " ");
3165     }
3166     return;
3167   }
3168 
3169   if (UseAddressNop && VM_Version::is_zx()) {
3170     //
3171     // Using multi-bytes nops "0x0F 0x1F [address]" for ZX
3172     //  1: 0x90
3173     //  2: 0x66 0x90
3174     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3175     //  4: 0x0F 0x1F 0x40 0x00
3176     //  5: 0x0F 0x1F 0x44 0x00 0x00
3177     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3178     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3179     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3180     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3181     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3182     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3183 
3184     // The rest coding is ZX specific - don't use consecutive address nops
3185 
3186     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3187     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3188     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3189     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3190 
3191     while (i >= 15) {
3192       // For ZX don't generate consecutive addess nops (mix with regular nops)
3193       i -= 15;
3194       emit_int8(0x66);   // size prefix
3195       emit_int8(0x66);   // size prefix
3196       emit_int8(0x66);   // size prefix
3197       addr_nop_8();
3198       emit_int8(0x66);   // size prefix
3199       emit_int8(0x66);   // size prefix
3200       emit_int8(0x66);   // size prefix
3201       emit_int8((unsigned char)0x90);
3202                          // nop
3203     }
3204     switch (i) {
3205       case 14:
3206         emit_int8(0x66); // size prefix
3207       case 13:
3208         emit_int8(0x66); // size prefix
3209       case 12:
3210         addr_nop_8();
3211         emit_int8(0x66); // size prefix
3212         emit_int8(0x66); // size prefix
3213         emit_int8(0x66); // size prefix
3214         emit_int8((unsigned char)0x90);
3215                          // nop
3216         break;
3217       case 11:
3218         emit_int8(0x66); // size prefix
3219       case 10:
3220         emit_int8(0x66); // size prefix
3221       case 9:
3222         emit_int8(0x66); // size prefix
3223       case 8:
3224         addr_nop_8();
3225         break;
3226       case 7:
3227         addr_nop_7();
3228         break;
3229       case 6:
3230         emit_int8(0x66); // size prefix
3231       case 5:
3232         addr_nop_5();
3233         break;
3234       case 4:
3235         addr_nop_4();
3236         break;
3237       case 3:
3238         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3239         emit_int8(0x66); // size prefix
3240       case 2:
3241         emit_int8(0x66); // size prefix
3242       case 1:
3243         emit_int8((unsigned char)0x90);
3244                          // nop
3245         break;
3246       default:
3247         assert(i == 0, " ");
3248     }
3249     return;
3250   }
3251 
3252   // Using nops with size prefixes "0x66 0x90".
3253   // From AMD Optimization Guide:
3254   //  1: 0x90
3255   //  2: 0x66 0x90
3256   //  3: 0x66 0x66 0x90
3257   //  4: 0x66 0x66 0x66 0x90
3258   //  5: 0x66 0x66 0x90 0x66 0x90
3259   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
3260   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
3261   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
3262   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3263   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3264   //
3265   while(i > 12) {
3266     i -= 4;
3267     emit_int8(0x66); // size prefix
3268     emit_int8(0x66);
3269     emit_int8(0x66);
3270     emit_int8((unsigned char)0x90);
3271                      // nop
3272   }
3273   // 1 - 12 nops
3274   if(i > 8) {
3275     if(i > 9) {
3276       i -= 1;
3277       emit_int8(0x66);
3278     }
3279     i -= 3;
3280     emit_int8(0x66);
3281     emit_int8(0x66);
3282     emit_int8((unsigned char)0x90);
3283   }
3284   // 1 - 8 nops
3285   if(i > 4) {
3286     if(i > 6) {
3287       i -= 1;
3288       emit_int8(0x66);
3289     }
3290     i -= 3;
3291     emit_int8(0x66);
3292     emit_int8(0x66);
3293     emit_int8((unsigned char)0x90);
3294   }
3295   switch (i) {
3296     case 4:
3297       emit_int8(0x66);
3298     case 3:
3299       emit_int8(0x66);
3300     case 2:
3301       emit_int8(0x66);
3302     case 1:
3303       emit_int8((unsigned char)0x90);
3304       break;
3305     default:
3306       assert(i == 0, " ");
3307   }
3308 }
3309 
3310 void Assembler::notl(Register dst) {
3311   int encode = prefix_and_encode(dst->encoding());
3312   emit_int8((unsigned char)0xF7);
3313   emit_int8((unsigned char)(0xD0 | encode));
3314 }
3315 
3316 void Assembler::orl(Address dst, int32_t imm32) {
3317   InstructionMark im(this);
3318   prefix(dst);
3319   emit_arith_operand(0x81, rcx, dst, imm32);
3320 }
3321 
3322 void Assembler::orl(Register dst, int32_t imm32) {
3323   prefix(dst);
3324   emit_arith(0x81, 0xC8, dst, imm32);
3325 }
3326 
3327 void Assembler::orl(Register dst, Address src) {
3328   InstructionMark im(this);
3329   prefix(src, dst);
3330   emit_int8(0x0B);
3331   emit_operand(dst, src);
3332 }
3333 
3334 void Assembler::orl(Register dst, Register src) {
3335   (void) prefix_and_encode(dst->encoding(), src->encoding());
3336   emit_arith(0x0B, 0xC0, dst, src);
3337 }
3338 
3339 void Assembler::orl(Address dst, Register src) {
3340   InstructionMark im(this);
3341   prefix(dst, src);
3342   emit_int8(0x09);
3343   emit_operand(src, dst);
3344 }
3345 
3346 void Assembler::packuswb(XMMRegister dst, Address src) {
3347   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3348   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3349   InstructionMark im(this);
3350   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3351   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3352   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3353   emit_int8(0x67);
3354   emit_operand(dst, src);
3355 }
3356 
3357 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
3358   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3359   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3360   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3361   emit_int8(0x67);
3362   emit_int8((unsigned char)(0xC0 | encode));
3363 }
3364 
3365 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3366   assert(UseAVX > 0, "some form of AVX must be enabled");
3367   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3368   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3369   emit_int8(0x67);
3370   emit_int8((unsigned char)(0xC0 | encode));
3371 }
3372 
3373 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
3374   assert(VM_Version::supports_avx2(), "");
3375   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3376   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3377   emit_int8(0x00);
3378   emit_int8(0xC0 | encode);
3379   emit_int8(imm8);
3380 }
3381 
3382 void Assembler::vperm2i128(XMMRegister dst,  XMMRegister nds, XMMRegister src, int imm8) {
3383   assert(VM_Version::supports_avx2(), "");
3384   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3385   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3386   emit_int8(0x46);
3387   emit_int8(0xC0 | encode);
3388   emit_int8(imm8);
3389 }
3390 
3391 void Assembler::vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
3392   assert(VM_Version::supports_avx(), "");
3393   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3394   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3395   emit_int8(0x06);
3396   emit_int8(0xC0 | encode);
3397   emit_int8(imm8);
3398 }
3399 
3400 
3401 void Assembler::pause() {
3402   emit_int8((unsigned char)0xF3);
3403   emit_int8((unsigned char)0x90);
3404 }
3405 
3406 void Assembler::ud2() {
3407   emit_int8(0x0F);
3408   emit_int8(0x0B);
3409 }
3410 
3411 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
3412   assert(VM_Version::supports_sse4_2(), "");
3413   InstructionMark im(this);
3414   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3415   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3416   emit_int8(0x61);
3417   emit_operand(dst, src);
3418   emit_int8(imm8);
3419 }
3420 
3421 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
3422   assert(VM_Version::supports_sse4_2(), "");
3423   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3424   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3425   emit_int8(0x61);
3426   emit_int8((unsigned char)(0xC0 | encode));
3427   emit_int8(imm8);
3428 }
3429 
3430 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3431 void Assembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
3432   assert(VM_Version::supports_sse2(), "");
3433   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3434   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3435   emit_int8(0x74);
3436   emit_int8((unsigned char)(0xC0 | encode));
3437 }
3438 
3439 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3440 void Assembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3441   assert(VM_Version::supports_avx(), "");
3442   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3443   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3444   emit_int8(0x74);
3445   emit_int8((unsigned char)(0xC0 | encode));
3446 }
3447 
3448 // In this context, kdst is written the mask used to process the equal components
3449 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3450   assert(VM_Version::supports_avx512bw(), "");
3451   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3452   attributes.set_is_evex_instruction();
3453   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3454   emit_int8(0x74);
3455   emit_int8((unsigned char)(0xC0 | encode));
3456 }
3457 
3458 void Assembler::evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3459   assert(VM_Version::supports_avx512vlbw(), "");
3460   InstructionMark im(this);
3461   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3462   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3463   attributes.set_is_evex_instruction();
3464   int dst_enc = kdst->encoding();
3465   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3466   emit_int8(0x64);
3467   emit_operand(as_Register(dst_enc), src);
3468 }
3469 
3470 void Assembler::evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3471   assert(is_vector_masking(), "");
3472   assert(VM_Version::supports_avx512vlbw(), "");
3473   InstructionMark im(this);
3474   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3475   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3476   attributes.reset_is_clear_context();
3477   attributes.set_embedded_opmask_register_specifier(mask);
3478   attributes.set_is_evex_instruction();
3479   int dst_enc = kdst->encoding();
3480   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3481   emit_int8(0x64);
3482   emit_operand(as_Register(dst_enc), src);
3483 }
3484 
3485 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3486   assert(VM_Version::supports_avx512vlbw(), "");
3487   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3488   attributes.set_is_evex_instruction();
3489   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3490   emit_int8(0x3E);
3491   emit_int8((unsigned char)(0xC0 | encode));
3492   emit_int8(vcc);
3493 }
3494 
3495 void Assembler::evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3496   assert(is_vector_masking(), "");
3497   assert(VM_Version::supports_avx512vlbw(), "");
3498   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3499   attributes.reset_is_clear_context();
3500   attributes.set_embedded_opmask_register_specifier(mask);
3501   attributes.set_is_evex_instruction();
3502   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3503   emit_int8(0x3E);
3504   emit_int8((unsigned char)(0xC0 | encode));
3505   emit_int8(vcc);
3506 }
3507 
3508 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len) {
3509   assert(VM_Version::supports_avx512vlbw(), "");
3510   InstructionMark im(this);
3511   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3512   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3513   attributes.set_is_evex_instruction();
3514   int dst_enc = kdst->encoding();
3515   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3516   emit_int8(0x3E);
3517   emit_operand(as_Register(dst_enc), src);
3518   emit_int8(vcc);
3519 }
3520 
3521 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3522   assert(VM_Version::supports_avx512bw(), "");
3523   InstructionMark im(this);
3524   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3525   attributes.set_is_evex_instruction();
3526   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3527   int dst_enc = kdst->encoding();
3528   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3529   emit_int8(0x74);
3530   emit_operand(as_Register(dst_enc), src);
3531 }
3532 
3533 void Assembler::evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3534   assert(VM_Version::supports_avx512vlbw(), "");
3535   assert(is_vector_masking(), "");    // For stub code use only
3536   InstructionMark im(this);
3537   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_reg_mask */ false, /* uses_vl */ false);
3538   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3539   attributes.reset_is_clear_context();
3540   attributes.set_embedded_opmask_register_specifier(mask);
3541   attributes.set_is_evex_instruction();
3542   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3543   emit_int8(0x74);
3544   emit_operand(as_Register(kdst->encoding()), src);
3545 }
3546 
3547 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3548 void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
3549   assert(VM_Version::supports_sse2(), "");
3550   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3551   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3552   emit_int8(0x75);
3553   emit_int8((unsigned char)(0xC0 | encode));
3554 }
3555 
3556 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3557 void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3558   assert(VM_Version::supports_avx(), "");
3559   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3560   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3561   emit_int8(0x75);
3562   emit_int8((unsigned char)(0xC0 | encode));
3563 }
3564 
3565 // In this context, kdst is written the mask used to process the equal components
3566 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3567   assert(VM_Version::supports_avx512bw(), "");
3568   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3569   attributes.set_is_evex_instruction();
3570   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3571   emit_int8(0x75);
3572   emit_int8((unsigned char)(0xC0 | encode));
3573 }
3574 
3575 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3576   assert(VM_Version::supports_avx512bw(), "");
3577   InstructionMark im(this);
3578   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3579   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3580   attributes.set_is_evex_instruction();
3581   int dst_enc = kdst->encoding();
3582   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3583   emit_int8(0x75);
3584   emit_operand(as_Register(dst_enc), src);
3585 }
3586 
3587 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3588 void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
3589   assert(VM_Version::supports_sse2(), "");
3590   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3591   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3592   emit_int8(0x76);
3593   emit_int8((unsigned char)(0xC0 | encode));
3594 }
3595 
3596 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3597 void Assembler::vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3598   assert(VM_Version::supports_avx(), "");
3599   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3600   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3601   emit_int8(0x76);
3602   emit_int8((unsigned char)(0xC0 | encode));
3603 }
3604 
3605 // In this context, kdst is written the mask used to process the equal components
3606 void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3607   assert(VM_Version::supports_evex(), "");
3608   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3609   attributes.set_is_evex_instruction();
3610   attributes.reset_is_clear_context();
3611   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3612   emit_int8(0x76);
3613   emit_int8((unsigned char)(0xC0 | encode));
3614 }
3615 
3616 void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3617   assert(VM_Version::supports_evex(), "");
3618   InstructionMark im(this);
3619   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3620   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3621   attributes.reset_is_clear_context();
3622   attributes.set_is_evex_instruction();
3623   int dst_enc = kdst->encoding();
3624   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3625   emit_int8(0x76);
3626   emit_operand(as_Register(dst_enc), src);
3627 }
3628 
3629 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3630 void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) {
3631   assert(VM_Version::supports_sse4_1(), "");
3632   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3633   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3634   emit_int8(0x29);
3635   emit_int8((unsigned char)(0xC0 | encode));
3636 }
3637 
3638 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3639 void Assembler::vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3640   assert(VM_Version::supports_avx(), "");
3641   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3642   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3643   emit_int8(0x29);
3644   emit_int8((unsigned char)(0xC0 | encode));
3645 }
3646 
3647 // In this context, kdst is written the mask used to process the equal components
3648 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3649   assert(VM_Version::supports_evex(), "");
3650   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3651   attributes.reset_is_clear_context();
3652   attributes.set_is_evex_instruction();
3653   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3654   emit_int8(0x29);
3655   emit_int8((unsigned char)(0xC0 | encode));
3656 }
3657 
3658 // In this context, kdst is written the mask used to process the equal components
3659 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3660   assert(VM_Version::supports_evex(), "");
3661   InstructionMark im(this);
3662   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3663   attributes.reset_is_clear_context();
3664   attributes.set_is_evex_instruction();
3665   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
3666   int dst_enc = kdst->encoding();
3667   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3668   emit_int8(0x29);
3669   emit_operand(as_Register(dst_enc), src);
3670 }
3671 
3672 void Assembler::pmovmskb(Register dst, XMMRegister src) {
3673   assert(VM_Version::supports_sse2(), "");
3674   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3675   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3676   emit_int8((unsigned char)0xD7);
3677   emit_int8((unsigned char)(0xC0 | encode));
3678 }
3679 
3680 void Assembler::vpmovmskb(Register dst, XMMRegister src) {
3681   assert(VM_Version::supports_avx2(), "");
3682   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3683   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3684   emit_int8((unsigned char)0xD7);
3685   emit_int8((unsigned char)(0xC0 | encode));
3686 }
3687 
3688 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
3689   assert(VM_Version::supports_sse4_1(), "");
3690   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3691   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3692   emit_int8(0x16);
3693   emit_int8((unsigned char)(0xC0 | encode));
3694   emit_int8(imm8);
3695 }
3696 
3697 void Assembler::pextrd(Address dst, XMMRegister src, int imm8) {
3698   assert(VM_Version::supports_sse4_1(), "");
3699   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3700   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3701   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3702   emit_int8(0x16);
3703   emit_operand(src, dst);
3704   emit_int8(imm8);
3705 }
3706 
3707 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
3708   assert(VM_Version::supports_sse4_1(), "");
3709   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3710   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3711   emit_int8(0x16);
3712   emit_int8((unsigned char)(0xC0 | encode));
3713   emit_int8(imm8);
3714 }
3715 
3716 void Assembler::pextrq(Address dst, XMMRegister src, int imm8) {
3717   assert(VM_Version::supports_sse4_1(), "");
3718   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3719   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3720   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3721   emit_int8(0x16);
3722   emit_operand(src, dst);
3723   emit_int8(imm8);
3724 }
3725 
3726 void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
3727   assert(VM_Version::supports_sse2(), "");
3728   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3729   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3730   emit_int8((unsigned char)0xC5);
3731   emit_int8((unsigned char)(0xC0 | encode));
3732   emit_int8(imm8);
3733 }
3734 
3735 void Assembler::pextrw(Address dst, XMMRegister src, int imm8) {
3736   assert(VM_Version::supports_sse4_1(), "");
3737   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3738   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
3739   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3740   emit_int8((unsigned char)0x15);
3741   emit_operand(src, dst);
3742   emit_int8(imm8);
3743 }
3744 
3745 void Assembler::pextrb(Address dst, XMMRegister src, int imm8) {
3746   assert(VM_Version::supports_sse4_1(), "");
3747   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3748   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
3749   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3750   emit_int8(0x14);
3751   emit_operand(src, dst);
3752   emit_int8(imm8);
3753 }
3754 
3755 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
3756   assert(VM_Version::supports_sse4_1(), "");
3757   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3758   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3759   emit_int8(0x22);
3760   emit_int8((unsigned char)(0xC0 | encode));
3761   emit_int8(imm8);
3762 }
3763 
3764 void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) {
3765   assert(VM_Version::supports_sse4_1(), "");
3766   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3767   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3768   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3769   emit_int8(0x22);
3770   emit_operand(dst,src);
3771   emit_int8(imm8);
3772 }
3773 
3774 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
3775   assert(VM_Version::supports_sse4_1(), "");
3776   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3777   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3778   emit_int8(0x22);
3779   emit_int8((unsigned char)(0xC0 | encode));
3780   emit_int8(imm8);
3781 }
3782 
3783 void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) {
3784   assert(VM_Version::supports_sse4_1(), "");
3785   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3786   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3787   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3788   emit_int8(0x22);
3789   emit_operand(dst, src);
3790   emit_int8(imm8);
3791 }
3792 
3793 void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
3794   assert(VM_Version::supports_sse2(), "");
3795   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3796   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3797   emit_int8((unsigned char)0xC4);
3798   emit_int8((unsigned char)(0xC0 | encode));
3799   emit_int8(imm8);
3800 }
3801 
3802 void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) {
3803   assert(VM_Version::supports_sse2(), "");
3804   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3805   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
3806   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3807   emit_int8((unsigned char)0xC4);
3808   emit_operand(dst, src);
3809   emit_int8(imm8);
3810 }
3811 
3812 void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) {
3813   assert(VM_Version::supports_sse4_1(), "");
3814   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3815   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
3816   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3817   emit_int8(0x20);
3818   emit_operand(dst, src);
3819   emit_int8(imm8);
3820 }
3821 
3822 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
3823   assert(VM_Version::supports_sse4_1(), "");
3824   InstructionMark im(this);
3825   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3826   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3827   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3828   emit_int8(0x30);
3829   emit_operand(dst, src);
3830 }
3831 
3832 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
3833   assert(VM_Version::supports_sse4_1(), "");
3834   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3835   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3836   emit_int8(0x30);
3837   emit_int8((unsigned char)(0xC0 | encode));
3838 }
3839 
3840 void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
3841   assert(VM_Version::supports_avx(), "");
3842   InstructionMark im(this);
3843   assert(dst != xnoreg, "sanity");
3844   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3845   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3846   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3847   emit_int8(0x30);
3848   emit_operand(dst, src);
3849 }
3850 
3851 void Assembler::evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
3852   assert(is_vector_masking(), "");
3853   assert(VM_Version::supports_avx512vlbw(), "");
3854   assert(dst != xnoreg, "sanity");
3855   InstructionMark im(this);
3856   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3857   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3858   attributes.set_embedded_opmask_register_specifier(mask);
3859   attributes.set_is_evex_instruction();
3860   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3861   emit_int8(0x30);
3862   emit_operand(dst, src);
3863 }
3864 
3865 void Assembler::evpmovwb(Address dst, XMMRegister src, int vector_len) {
3866   assert(VM_Version::supports_avx512vlbw(), "");
3867   assert(src != xnoreg, "sanity");
3868   InstructionMark im(this);
3869   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3870   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3871   attributes.set_is_evex_instruction();
3872   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
3873   emit_int8(0x30);
3874   emit_operand(src, dst);
3875 }
3876 
3877 void Assembler::evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len) {
3878   assert(is_vector_masking(), "");
3879   assert(VM_Version::supports_avx512vlbw(), "");
3880   assert(src != xnoreg, "sanity");
3881   InstructionMark im(this);
3882   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3883   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3884   attributes.reset_is_clear_context();
3885   attributes.set_embedded_opmask_register_specifier(mask);
3886   attributes.set_is_evex_instruction();
3887   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
3888   emit_int8(0x30);
3889   emit_operand(src, dst);
3890 }
3891 
3892 // generic
3893 void Assembler::pop(Register dst) {
3894   int encode = prefix_and_encode(dst->encoding());
3895   emit_int8(0x58 | encode);
3896 }
3897 
3898 void Assembler::popcntl(Register dst, Address src) {
3899   assert(VM_Version::supports_popcnt(), "must support");
3900   InstructionMark im(this);
3901   emit_int8((unsigned char)0xF3);
3902   prefix(src, dst);
3903   emit_int8(0x0F);
3904   emit_int8((unsigned char)0xB8);
3905   emit_operand(dst, src);
3906 }
3907 
3908 void Assembler::popcntl(Register dst, Register src) {
3909   assert(VM_Version::supports_popcnt(), "must support");
3910   emit_int8((unsigned char)0xF3);
3911   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3912   emit_int8(0x0F);
3913   emit_int8((unsigned char)0xB8);
3914   emit_int8((unsigned char)(0xC0 | encode));
3915 }
3916 
3917 void Assembler::popf() {
3918   emit_int8((unsigned char)0x9D);
3919 }
3920 
3921 #ifndef _LP64 // no 32bit push/pop on amd64
3922 void Assembler::popl(Address dst) {
3923   // NOTE: this will adjust stack by 8byte on 64bits
3924   InstructionMark im(this);
3925   prefix(dst);
3926   emit_int8((unsigned char)0x8F);
3927   emit_operand(rax, dst);
3928 }
3929 #endif
3930 
3931 void Assembler::prefetch_prefix(Address src) {
3932   prefix(src);
3933   emit_int8(0x0F);
3934 }
3935 
3936 void Assembler::prefetchnta(Address src) {
3937   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3938   InstructionMark im(this);
3939   prefetch_prefix(src);
3940   emit_int8(0x18);
3941   emit_operand(rax, src); // 0, src
3942 }
3943 
3944 void Assembler::prefetchr(Address src) {
3945   assert(VM_Version::supports_3dnow_prefetch(), "must support");
3946   InstructionMark im(this);
3947   prefetch_prefix(src);
3948   emit_int8(0x0D);
3949   emit_operand(rax, src); // 0, src
3950 }
3951 
3952 void Assembler::prefetcht0(Address src) {
3953   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3954   InstructionMark im(this);
3955   prefetch_prefix(src);
3956   emit_int8(0x18);
3957   emit_operand(rcx, src); // 1, src
3958 }
3959 
3960 void Assembler::prefetcht1(Address src) {
3961   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3962   InstructionMark im(this);
3963   prefetch_prefix(src);
3964   emit_int8(0x18);
3965   emit_operand(rdx, src); // 2, src
3966 }
3967 
3968 void Assembler::prefetcht2(Address src) {
3969   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3970   InstructionMark im(this);
3971   prefetch_prefix(src);
3972   emit_int8(0x18);
3973   emit_operand(rbx, src); // 3, src
3974 }
3975 
3976 void Assembler::prefetchw(Address src) {
3977   assert(VM_Version::supports_3dnow_prefetch(), "must support");
3978   InstructionMark im(this);
3979   prefetch_prefix(src);
3980   emit_int8(0x0D);
3981   emit_operand(rcx, src); // 1, src
3982 }
3983 
3984 void Assembler::prefix(Prefix p) {
3985   emit_int8(p);
3986 }
3987 
3988 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
3989   assert(VM_Version::supports_ssse3(), "");
3990   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3991   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3992   emit_int8(0x00);
3993   emit_int8((unsigned char)(0xC0 | encode));
3994 }
3995 
3996 void Assembler::vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3997   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
3998          vector_len == AVX_256bit? VM_Version::supports_avx2() :
3999          0, "");
4000   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4001   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4002   emit_int8(0x00);
4003   emit_int8((unsigned char)(0xC0 | encode));
4004 }
4005 
4006 void Assembler::pshufb(XMMRegister dst, Address src) {
4007   assert(VM_Version::supports_ssse3(), "");
4008   InstructionMark im(this);
4009   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4010   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4011   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4012   emit_int8(0x00);
4013   emit_operand(dst, src);
4014 }
4015 
4016 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
4017   assert(isByte(mode), "invalid value");
4018   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4019   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
4020   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4021   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4022   emit_int8(0x70);
4023   emit_int8((unsigned char)(0xC0 | encode));
4024   emit_int8(mode & 0xFF);
4025 }
4026 
4027 void Assembler::vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len) {
4028   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4029          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4030          0, "");
4031   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4032   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4033   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4034   emit_int8(0x70);
4035   emit_int8((unsigned char)(0xC0 | encode));
4036   emit_int8(mode & 0xFF);
4037 }
4038 
4039 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
4040   assert(isByte(mode), "invalid value");
4041   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4042   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4043   InstructionMark im(this);
4044   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4045   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4046   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4047   emit_int8(0x70);
4048   emit_operand(dst, src);
4049   emit_int8(mode & 0xFF);
4050 }
4051 
4052 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
4053   assert(isByte(mode), "invalid value");
4054   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4055   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4056   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4057   emit_int8(0x70);
4058   emit_int8((unsigned char)(0xC0 | encode));
4059   emit_int8(mode & 0xFF);
4060 }
4061 
4062 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
4063   assert(isByte(mode), "invalid value");
4064   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4065   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4066   InstructionMark im(this);
4067   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4068   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4069   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4070   emit_int8(0x70);
4071   emit_operand(dst, src);
4072   emit_int8(mode & 0xFF);
4073 }
4074 
4075 void Assembler::psrldq(XMMRegister dst, int shift) {
4076   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
4077   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4078   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4079   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4080   emit_int8(0x73);
4081   emit_int8((unsigned char)(0xC0 | encode));
4082   emit_int8(shift);
4083 }
4084 
4085 void Assembler::pslldq(XMMRegister dst, int shift) {
4086   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
4087   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4088   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4089   // XMM7 is for /7 encoding: 66 0F 73 /7 ib
4090   int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4091   emit_int8(0x73);
4092   emit_int8((unsigned char)(0xC0 | encode));
4093   emit_int8(shift);
4094 }
4095 
4096 void Assembler::ptest(XMMRegister dst, Address src) {
4097   assert(VM_Version::supports_sse4_1(), "");
4098   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4099   InstructionMark im(this);
4100   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4101   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4102   emit_int8(0x17);
4103   emit_operand(dst, src);
4104 }
4105 
4106 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
4107   assert(VM_Version::supports_sse4_1(), "");
4108   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4109   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4110   emit_int8(0x17);
4111   emit_int8((unsigned char)(0xC0 | encode));
4112 }
4113 
4114 void Assembler::vptest(XMMRegister dst, Address src) {
4115   assert(VM_Version::supports_avx(), "");
4116   InstructionMark im(this);
4117   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4118   assert(dst != xnoreg, "sanity");
4119   // swap src<->dst for encoding
4120   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4121   emit_int8(0x17);
4122   emit_operand(dst, src);
4123 }
4124 
4125 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
4126   assert(VM_Version::supports_avx(), "");
4127   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4128   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4129   emit_int8(0x17);
4130   emit_int8((unsigned char)(0xC0 | encode));
4131 }
4132 
4133 void Assembler::punpcklbw(XMMRegister dst, Address src) {
4134   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4135   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4136   InstructionMark im(this);
4137   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
4138   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4139   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4140   emit_int8(0x60);
4141   emit_operand(dst, src);
4142 }
4143 
4144 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
4145   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4146   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
4147   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4148   emit_int8(0x60);
4149   emit_int8((unsigned char)(0xC0 | encode));
4150 }
4151 
4152 void Assembler::punpckldq(XMMRegister dst, Address src) {
4153   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4154   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4155   InstructionMark im(this);
4156   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4157   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4158   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4159   emit_int8(0x62);
4160   emit_operand(dst, src);
4161 }
4162 
4163 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
4164   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4165   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4166   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4167   emit_int8(0x62);
4168   emit_int8((unsigned char)(0xC0 | encode));
4169 }
4170 
4171 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
4172   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4173   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4174   attributes.set_rex_vex_w_reverted();
4175   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4176   emit_int8(0x6C);
4177   emit_int8((unsigned char)(0xC0 | encode));
4178 }
4179 
4180 void Assembler::push(int32_t imm32) {
4181   // in 64bits we push 64bits onto the stack but only
4182   // take a 32bit immediate
4183   emit_int8(0x68);
4184   emit_int32(imm32);
4185 }
4186 
4187 void Assembler::push(Register src) {
4188   int encode = prefix_and_encode(src->encoding());
4189 
4190   emit_int8(0x50 | encode);
4191 }
4192 
4193 void Assembler::pushf() {
4194   emit_int8((unsigned char)0x9C);
4195 }
4196 
4197 #ifndef _LP64 // no 32bit push/pop on amd64
4198 void Assembler::pushl(Address src) {
4199   // Note this will push 64bit on 64bit
4200   InstructionMark im(this);
4201   prefix(src);
4202   emit_int8((unsigned char)0xFF);
4203   emit_operand(rsi, src);
4204 }
4205 #endif
4206 
4207 void Assembler::rcll(Register dst, int imm8) {
4208   assert(isShiftCount(imm8), "illegal shift count");
4209   int encode = prefix_and_encode(dst->encoding());
4210   if (imm8 == 1) {
4211     emit_int8((unsigned char)0xD1);
4212     emit_int8((unsigned char)(0xD0 | encode));
4213   } else {
4214     emit_int8((unsigned char)0xC1);
4215     emit_int8((unsigned char)0xD0 | encode);
4216     emit_int8(imm8);
4217   }
4218 }
4219 
4220 void Assembler::rcpps(XMMRegister dst, XMMRegister src) {
4221   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4222   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4223   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4224   emit_int8(0x53);
4225   emit_int8((unsigned char)(0xC0 | encode));
4226 }
4227 
4228 void Assembler::rcpss(XMMRegister dst, XMMRegister src) {
4229   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4230   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4231   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4232   emit_int8(0x53);
4233   emit_int8((unsigned char)(0xC0 | encode));
4234 }
4235 
4236 void Assembler::rdtsc() {
4237   emit_int8((unsigned char)0x0F);
4238   emit_int8((unsigned char)0x31);
4239 }
4240 
4241 // copies data from [esi] to [edi] using rcx pointer sized words
4242 // generic
4243 void Assembler::rep_mov() {
4244   emit_int8((unsigned char)0xF3);
4245   // MOVSQ
4246   LP64_ONLY(prefix(REX_W));
4247   emit_int8((unsigned char)0xA5);
4248 }
4249 
4250 // sets rcx bytes with rax, value at [edi]
4251 void Assembler::rep_stosb() {
4252   emit_int8((unsigned char)0xF3); // REP
4253   LP64_ONLY(prefix(REX_W));
4254   emit_int8((unsigned char)0xAA); // STOSB
4255 }
4256 
4257 // sets rcx pointer sized words with rax, value at [edi]
4258 // generic
4259 void Assembler::rep_stos() {
4260   emit_int8((unsigned char)0xF3); // REP
4261   LP64_ONLY(prefix(REX_W));       // LP64:STOSQ, LP32:STOSD
4262   emit_int8((unsigned char)0xAB);
4263 }
4264 
4265 // scans rcx pointer sized words at [edi] for occurance of rax,
4266 // generic
4267 void Assembler::repne_scan() { // repne_scan
4268   emit_int8((unsigned char)0xF2);
4269   // SCASQ
4270   LP64_ONLY(prefix(REX_W));
4271   emit_int8((unsigned char)0xAF);
4272 }
4273 
4274 #ifdef _LP64
4275 // scans rcx 4 byte words at [edi] for occurance of rax,
4276 // generic
4277 void Assembler::repne_scanl() { // repne_scan
4278   emit_int8((unsigned char)0xF2);
4279   // SCASL
4280   emit_int8((unsigned char)0xAF);
4281 }
4282 #endif
4283 
4284 void Assembler::ret(int imm16) {
4285   if (imm16 == 0) {
4286     emit_int8((unsigned char)0xC3);
4287   } else {
4288     emit_int8((unsigned char)0xC2);
4289     emit_int16(imm16);
4290   }
4291 }
4292 
4293 void Assembler::sahf() {
4294 #ifdef _LP64
4295   // Not supported in 64bit mode
4296   ShouldNotReachHere();
4297 #endif
4298   emit_int8((unsigned char)0x9E);
4299 }
4300 
4301 void Assembler::sarl(Register dst, int imm8) {
4302   int encode = prefix_and_encode(dst->encoding());
4303   assert(isShiftCount(imm8), "illegal shift count");
4304   if (imm8 == 1) {
4305     emit_int8((unsigned char)0xD1);
4306     emit_int8((unsigned char)(0xF8 | encode));
4307   } else {
4308     emit_int8((unsigned char)0xC1);
4309     emit_int8((unsigned char)(0xF8 | encode));
4310     emit_int8(imm8);
4311   }
4312 }
4313 
4314 void Assembler::sarl(Register dst) {
4315   int encode = prefix_and_encode(dst->encoding());
4316   emit_int8((unsigned char)0xD3);
4317   emit_int8((unsigned char)(0xF8 | encode));
4318 }
4319 
4320 void Assembler::sbbl(Address dst, int32_t imm32) {
4321   InstructionMark im(this);
4322   prefix(dst);
4323   emit_arith_operand(0x81, rbx, dst, imm32);
4324 }
4325 
4326 void Assembler::sbbl(Register dst, int32_t imm32) {
4327   prefix(dst);
4328   emit_arith(0x81, 0xD8, dst, imm32);
4329 }
4330 
4331 
4332 void Assembler::sbbl(Register dst, Address src) {
4333   InstructionMark im(this);
4334   prefix(src, dst);
4335   emit_int8(0x1B);
4336   emit_operand(dst, src);
4337 }
4338 
4339 void Assembler::sbbl(Register dst, Register src) {
4340   (void) prefix_and_encode(dst->encoding(), src->encoding());
4341   emit_arith(0x1B, 0xC0, dst, src);
4342 }
4343 
4344 void Assembler::setb(Condition cc, Register dst) {
4345   assert(0 <= cc && cc < 16, "illegal cc");
4346   int encode = prefix_and_encode(dst->encoding(), true);
4347   emit_int8(0x0F);
4348   emit_int8((unsigned char)0x90 | cc);
4349   emit_int8((unsigned char)(0xC0 | encode));
4350 }
4351 
4352 void Assembler::palignr(XMMRegister dst, XMMRegister src, int imm8) {
4353   assert(VM_Version::supports_ssse3(), "");
4354   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
4355   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4356   emit_int8((unsigned char)0x0F);
4357   emit_int8((unsigned char)(0xC0 | encode));
4358   emit_int8(imm8);
4359 }
4360 
4361 void Assembler::vpalignr(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
4362   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4363          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4364          0, "");
4365   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
4366   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4367   emit_int8((unsigned char)0x0F);
4368   emit_int8((unsigned char)(0xC0 | encode));
4369   emit_int8(imm8);
4370 }
4371 
4372 void Assembler::pblendw(XMMRegister dst, XMMRegister src, int imm8) {
4373   assert(VM_Version::supports_sse4_1(), "");
4374   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4375   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4376   emit_int8((unsigned char)0x0E);
4377   emit_int8((unsigned char)(0xC0 | encode));
4378   emit_int8(imm8);
4379 }
4380 
4381 void Assembler::sha1rnds4(XMMRegister dst, XMMRegister src, int imm8) {
4382   assert(VM_Version::supports_sha(), "");
4383   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, /* rex_w */ false);
4384   emit_int8((unsigned char)0xCC);
4385   emit_int8((unsigned char)(0xC0 | encode));
4386   emit_int8((unsigned char)imm8);
4387 }
4388 
4389 void Assembler::sha1nexte(XMMRegister dst, XMMRegister src) {
4390   assert(VM_Version::supports_sha(), "");
4391   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4392   emit_int8((unsigned char)0xC8);
4393   emit_int8((unsigned char)(0xC0 | encode));
4394 }
4395 
4396 void Assembler::sha1msg1(XMMRegister dst, XMMRegister src) {
4397   assert(VM_Version::supports_sha(), "");
4398   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4399   emit_int8((unsigned char)0xC9);
4400   emit_int8((unsigned char)(0xC0 | encode));
4401 }
4402 
4403 void Assembler::sha1msg2(XMMRegister dst, XMMRegister src) {
4404   assert(VM_Version::supports_sha(), "");
4405   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4406   emit_int8((unsigned char)0xCA);
4407   emit_int8((unsigned char)(0xC0 | encode));
4408 }
4409 
4410 // xmm0 is implicit additional source to this instruction.
4411 void Assembler::sha256rnds2(XMMRegister dst, XMMRegister src) {
4412   assert(VM_Version::supports_sha(), "");
4413   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4414   emit_int8((unsigned char)0xCB);
4415   emit_int8((unsigned char)(0xC0 | encode));
4416 }
4417 
4418 void Assembler::sha256msg1(XMMRegister dst, XMMRegister src) {
4419   assert(VM_Version::supports_sha(), "");
4420   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4421   emit_int8((unsigned char)0xCC);
4422   emit_int8((unsigned char)(0xC0 | encode));
4423 }
4424 
4425 void Assembler::sha256msg2(XMMRegister dst, XMMRegister src) {
4426   assert(VM_Version::supports_sha(), "");
4427   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4428   emit_int8((unsigned char)0xCD);
4429   emit_int8((unsigned char)(0xC0 | encode));
4430 }
4431 
4432 
4433 void Assembler::shll(Register dst, int imm8) {
4434   assert(isShiftCount(imm8), "illegal shift count");
4435   int encode = prefix_and_encode(dst->encoding());
4436   if (imm8 == 1 ) {
4437     emit_int8((unsigned char)0xD1);
4438     emit_int8((unsigned char)(0xE0 | encode));
4439   } else {
4440     emit_int8((unsigned char)0xC1);
4441     emit_int8((unsigned char)(0xE0 | encode));
4442     emit_int8(imm8);
4443   }
4444 }
4445 
4446 void Assembler::shll(Register dst) {
4447   int encode = prefix_and_encode(dst->encoding());
4448   emit_int8((unsigned char)0xD3);
4449   emit_int8((unsigned char)(0xE0 | encode));
4450 }
4451 
4452 void Assembler::shrl(Register dst, int imm8) {
4453   assert(isShiftCount(imm8), "illegal shift count");
4454   int encode = prefix_and_encode(dst->encoding());
4455   emit_int8((unsigned char)0xC1);
4456   emit_int8((unsigned char)(0xE8 | encode));
4457   emit_int8(imm8);
4458 }
4459 
4460 void Assembler::shrl(Register dst) {
4461   int encode = prefix_and_encode(dst->encoding());
4462   emit_int8((unsigned char)0xD3);
4463   emit_int8((unsigned char)(0xE8 | encode));
4464 }
4465 
4466 // copies a single word from [esi] to [edi]
4467 void Assembler::smovl() {
4468   emit_int8((unsigned char)0xA5);
4469 }
4470 
4471 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
4472   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4473   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4474   attributes.set_rex_vex_w_reverted();
4475   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4476   emit_int8(0x51);
4477   emit_int8((unsigned char)(0xC0 | encode));
4478 }
4479 
4480 void Assembler::sqrtsd(XMMRegister dst, Address src) {
4481   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4482   InstructionMark im(this);
4483   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4484   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4485   attributes.set_rex_vex_w_reverted();
4486   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4487   emit_int8(0x51);
4488   emit_operand(dst, src);
4489 }
4490 
4491 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
4492   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4493   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4494   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4495   emit_int8(0x51);
4496   emit_int8((unsigned char)(0xC0 | encode));
4497 }
4498 
4499 void Assembler::std() {
4500   emit_int8((unsigned char)0xFD);
4501 }
4502 
4503 void Assembler::sqrtss(XMMRegister dst, Address src) {
4504   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4505   InstructionMark im(this);
4506   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4507   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4508   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4509   emit_int8(0x51);
4510   emit_operand(dst, src);
4511 }
4512 
4513 void Assembler::stmxcsr( Address dst) {
4514   if (UseAVX > 0 ) {
4515     assert(VM_Version::supports_avx(), "");
4516     InstructionMark im(this);
4517     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4518     vex_prefix(dst, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4519     emit_int8((unsigned char)0xAE);
4520     emit_operand(as_Register(3), dst);
4521   } else {
4522     NOT_LP64(assert(VM_Version::supports_sse(), ""));
4523     InstructionMark im(this);
4524     prefix(dst);
4525     emit_int8(0x0F);
4526     emit_int8((unsigned char)0xAE);
4527     emit_operand(as_Register(3), dst);
4528   }
4529 }
4530 
4531 void Assembler::subl(Address dst, int32_t imm32) {
4532   InstructionMark im(this);
4533   prefix(dst);
4534   emit_arith_operand(0x81, rbp, dst, imm32);
4535 }
4536 
4537 void Assembler::subl(Address dst, Register src) {
4538   InstructionMark im(this);
4539   prefix(dst, src);
4540   emit_int8(0x29);
4541   emit_operand(src, dst);
4542 }
4543 
4544 void Assembler::subl(Register dst, int32_t imm32) {
4545   prefix(dst);
4546   emit_arith(0x81, 0xE8, dst, imm32);
4547 }
4548 
4549 // Force generation of a 4 byte immediate value even if it fits into 8bit
4550 void Assembler::subl_imm32(Register dst, int32_t imm32) {
4551   prefix(dst);
4552   emit_arith_imm32(0x81, 0xE8, dst, imm32);
4553 }
4554 
4555 void Assembler::subl(Register dst, Address src) {
4556   InstructionMark im(this);
4557   prefix(src, dst);
4558   emit_int8(0x2B);
4559   emit_operand(dst, src);
4560 }
4561 
4562 void Assembler::subl(Register dst, Register src) {
4563   (void) prefix_and_encode(dst->encoding(), src->encoding());
4564   emit_arith(0x2B, 0xC0, dst, src);
4565 }
4566 
4567 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
4568   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4569   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4570   attributes.set_rex_vex_w_reverted();
4571   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4572   emit_int8(0x5C);
4573   emit_int8((unsigned char)(0xC0 | encode));
4574 }
4575 
4576 void Assembler::subsd(XMMRegister dst, Address src) {
4577   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4578   InstructionMark im(this);
4579   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4580   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4581   attributes.set_rex_vex_w_reverted();
4582   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4583   emit_int8(0x5C);
4584   emit_operand(dst, src);
4585 }
4586 
4587 void Assembler::subss(XMMRegister dst, XMMRegister src) {
4588   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4589   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true , /* uses_vl */ false);
4590   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4591   emit_int8(0x5C);
4592   emit_int8((unsigned char)(0xC0 | encode));
4593 }
4594 
4595 void Assembler::subss(XMMRegister dst, Address src) {
4596   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4597   InstructionMark im(this);
4598   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4599   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4600   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4601   emit_int8(0x5C);
4602   emit_operand(dst, src);
4603 }
4604 
4605 void Assembler::testb(Register dst, int imm8) {
4606   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
4607   (void) prefix_and_encode(dst->encoding(), true);
4608   emit_arith_b(0xF6, 0xC0, dst, imm8);
4609 }
4610 
4611 void Assembler::testb(Address dst, int imm8) {
4612   InstructionMark im(this);
4613   prefix(dst);
4614   emit_int8((unsigned char)0xF6);
4615   emit_operand(rax, dst, 1);
4616   emit_int8(imm8);
4617 }
4618 
4619 void Assembler::testl(Register dst, int32_t imm32) {
4620   // not using emit_arith because test
4621   // doesn't support sign-extension of
4622   // 8bit operands
4623   int encode = dst->encoding();
4624   if (encode == 0) {
4625     emit_int8((unsigned char)0xA9);
4626   } else {
4627     encode = prefix_and_encode(encode);
4628     emit_int8((unsigned char)0xF7);
4629     emit_int8((unsigned char)(0xC0 | encode));
4630   }
4631   emit_int32(imm32);
4632 }
4633 
4634 void Assembler::testl(Register dst, Register src) {
4635   (void) prefix_and_encode(dst->encoding(), src->encoding());
4636   emit_arith(0x85, 0xC0, dst, src);
4637 }
4638 
4639 void Assembler::testl(Register dst, Address src) {
4640   InstructionMark im(this);
4641   prefix(src, dst);
4642   emit_int8((unsigned char)0x85);
4643   emit_operand(dst, src);
4644 }
4645 
4646 void Assembler::tzcntl(Register dst, Register src) {
4647   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
4648   emit_int8((unsigned char)0xF3);
4649   int encode = prefix_and_encode(dst->encoding(), src->encoding());
4650   emit_int8(0x0F);
4651   emit_int8((unsigned char)0xBC);
4652   emit_int8((unsigned char)0xC0 | encode);
4653 }
4654 
4655 void Assembler::tzcntq(Register dst, Register src) {
4656   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
4657   emit_int8((unsigned char)0xF3);
4658   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4659   emit_int8(0x0F);
4660   emit_int8((unsigned char)0xBC);
4661   emit_int8((unsigned char)(0xC0 | encode));
4662 }
4663 
4664 void Assembler::ucomisd(XMMRegister dst, Address src) {
4665   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4666   InstructionMark im(this);
4667   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4668   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4669   attributes.set_rex_vex_w_reverted();
4670   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4671   emit_int8(0x2E);
4672   emit_operand(dst, src);
4673 }
4674 
4675 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
4676   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4677   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4678   attributes.set_rex_vex_w_reverted();
4679   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4680   emit_int8(0x2E);
4681   emit_int8((unsigned char)(0xC0 | encode));
4682 }
4683 
4684 void Assembler::ucomiss(XMMRegister dst, Address src) {
4685   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4686   InstructionMark im(this);
4687   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4688   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4689   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4690   emit_int8(0x2E);
4691   emit_operand(dst, src);
4692 }
4693 
4694 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
4695   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4696   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4697   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4698   emit_int8(0x2E);
4699   emit_int8((unsigned char)(0xC0 | encode));
4700 }
4701 
4702 void Assembler::xabort(int8_t imm8) {
4703   emit_int8((unsigned char)0xC6);
4704   emit_int8((unsigned char)0xF8);
4705   emit_int8((unsigned char)(imm8 & 0xFF));
4706 }
4707 
4708 void Assembler::xaddb(Address dst, Register src) {
4709   InstructionMark im(this);
4710   prefix(dst, src, true);
4711   emit_int8(0x0F);
4712   emit_int8((unsigned char)0xC0);
4713   emit_operand(src, dst);
4714 }
4715 
4716 void Assembler::xaddw(Address dst, Register src) {
4717   InstructionMark im(this);
4718   emit_int8(0x66);
4719   prefix(dst, src);
4720   emit_int8(0x0F);
4721   emit_int8((unsigned char)0xC1);
4722   emit_operand(src, dst);
4723 }
4724 
4725 void Assembler::xaddl(Address dst, Register src) {
4726   InstructionMark im(this);
4727   prefix(dst, src);
4728   emit_int8(0x0F);
4729   emit_int8((unsigned char)0xC1);
4730   emit_operand(src, dst);
4731 }
4732 
4733 void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
4734   InstructionMark im(this);
4735   relocate(rtype);
4736   if (abort.is_bound()) {
4737     address entry = target(abort);
4738     assert(entry != NULL, "abort entry NULL");
4739     intptr_t offset = entry - pc();
4740     emit_int8((unsigned char)0xC7);
4741     emit_int8((unsigned char)0xF8);
4742     emit_int32(offset - 6); // 2 opcode + 4 address
4743   } else {
4744     abort.add_patch_at(code(), locator());
4745     emit_int8((unsigned char)0xC7);
4746     emit_int8((unsigned char)0xF8);
4747     emit_int32(0);
4748   }
4749 }
4750 
4751 void Assembler::xchgb(Register dst, Address src) { // xchg
4752   InstructionMark im(this);
4753   prefix(src, dst, true);
4754   emit_int8((unsigned char)0x86);
4755   emit_operand(dst, src);
4756 }
4757 
4758 void Assembler::xchgw(Register dst, Address src) { // xchg
4759   InstructionMark im(this);
4760   emit_int8(0x66);
4761   prefix(src, dst);
4762   emit_int8((unsigned char)0x87);
4763   emit_operand(dst, src);
4764 }
4765 
4766 void Assembler::xchgl(Register dst, Address src) { // xchg
4767   InstructionMark im(this);
4768   prefix(src, dst);
4769   emit_int8((unsigned char)0x87);
4770   emit_operand(dst, src);
4771 }
4772 
4773 void Assembler::xchgl(Register dst, Register src) {
4774   int encode = prefix_and_encode(dst->encoding(), src->encoding());
4775   emit_int8((unsigned char)0x87);
4776   emit_int8((unsigned char)(0xC0 | encode));
4777 }
4778 
4779 void Assembler::xend() {
4780   emit_int8((unsigned char)0x0F);
4781   emit_int8((unsigned char)0x01);
4782   emit_int8((unsigned char)0xD5);
4783 }
4784 
4785 void Assembler::xgetbv() {
4786   emit_int8(0x0F);
4787   emit_int8(0x01);
4788   emit_int8((unsigned char)0xD0);
4789 }
4790 
4791 void Assembler::xorl(Register dst, int32_t imm32) {
4792   prefix(dst);
4793   emit_arith(0x81, 0xF0, dst, imm32);
4794 }
4795 
4796 void Assembler::xorl(Register dst, Address src) {
4797   InstructionMark im(this);
4798   prefix(src, dst);
4799   emit_int8(0x33);
4800   emit_operand(dst, src);
4801 }
4802 
4803 void Assembler::xorl(Register dst, Register src) {
4804   (void) prefix_and_encode(dst->encoding(), src->encoding());
4805   emit_arith(0x33, 0xC0, dst, src);
4806 }
4807 
4808 void Assembler::xorb(Register dst, Address src) {
4809   InstructionMark im(this);
4810   prefix(src, dst);
4811   emit_int8(0x32);
4812   emit_operand(dst, src);
4813 }
4814 
4815 // AVX 3-operands scalar float-point arithmetic instructions
4816 
4817 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
4818   assert(VM_Version::supports_avx(), "");
4819   InstructionMark im(this);
4820   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4821   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4822   attributes.set_rex_vex_w_reverted();
4823   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4824   emit_int8(0x58);
4825   emit_operand(dst, src);
4826 }
4827 
4828 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4829   assert(VM_Version::supports_avx(), "");
4830   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4831   attributes.set_rex_vex_w_reverted();
4832   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4833   emit_int8(0x58);
4834   emit_int8((unsigned char)(0xC0 | encode));
4835 }
4836 
4837 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
4838   assert(VM_Version::supports_avx(), "");
4839   InstructionMark im(this);
4840   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4841   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4842   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4843   emit_int8(0x58);
4844   emit_operand(dst, src);
4845 }
4846 
4847 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4848   assert(VM_Version::supports_avx(), "");
4849   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4850   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4851   emit_int8(0x58);
4852   emit_int8((unsigned char)(0xC0 | encode));
4853 }
4854 
4855 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
4856   assert(VM_Version::supports_avx(), "");
4857   InstructionMark im(this);
4858   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4859   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4860   attributes.set_rex_vex_w_reverted();
4861   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4862   emit_int8(0x5E);
4863   emit_operand(dst, src);
4864 }
4865 
4866 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4867   assert(VM_Version::supports_avx(), "");
4868   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4869   attributes.set_rex_vex_w_reverted();
4870   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4871   emit_int8(0x5E);
4872   emit_int8((unsigned char)(0xC0 | encode));
4873 }
4874 
4875 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
4876   assert(VM_Version::supports_avx(), "");
4877   InstructionMark im(this);
4878   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4879   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4880   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4881   emit_int8(0x5E);
4882   emit_operand(dst, src);
4883 }
4884 
4885 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4886   assert(VM_Version::supports_avx(), "");
4887   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4888   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4889   emit_int8(0x5E);
4890   emit_int8((unsigned char)(0xC0 | encode));
4891 }
4892 
4893 void Assembler::vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
4894   assert(VM_Version::supports_fma(), "");
4895   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4896   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4897   emit_int8((unsigned char)0xB9);
4898   emit_int8((unsigned char)(0xC0 | encode));
4899 }
4900 
4901 void Assembler::vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
4902   assert(VM_Version::supports_fma(), "");
4903   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4904   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4905   emit_int8((unsigned char)0xB9);
4906   emit_int8((unsigned char)(0xC0 | encode));
4907 }
4908 
4909 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
4910   assert(VM_Version::supports_avx(), "");
4911   InstructionMark im(this);
4912   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4913   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4914   attributes.set_rex_vex_w_reverted();
4915   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4916   emit_int8(0x59);
4917   emit_operand(dst, src);
4918 }
4919 
4920 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4921   assert(VM_Version::supports_avx(), "");
4922   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4923   attributes.set_rex_vex_w_reverted();
4924   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4925   emit_int8(0x59);
4926   emit_int8((unsigned char)(0xC0 | encode));
4927 }
4928 
4929 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
4930   assert(VM_Version::supports_avx(), "");
4931   InstructionMark im(this);
4932   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4933   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4934   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4935   emit_int8(0x59);
4936   emit_operand(dst, src);
4937 }
4938 
4939 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4940   assert(VM_Version::supports_avx(), "");
4941   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4942   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4943   emit_int8(0x59);
4944   emit_int8((unsigned char)(0xC0 | encode));
4945 }
4946 
4947 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
4948   assert(VM_Version::supports_avx(), "");
4949   InstructionMark im(this);
4950   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4951   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4952   attributes.set_rex_vex_w_reverted();
4953   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4954   emit_int8(0x5C);
4955   emit_operand(dst, src);
4956 }
4957 
4958 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4959   assert(VM_Version::supports_avx(), "");
4960   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4961   attributes.set_rex_vex_w_reverted();
4962   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4963   emit_int8(0x5C);
4964   emit_int8((unsigned char)(0xC0 | encode));
4965 }
4966 
4967 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
4968   assert(VM_Version::supports_avx(), "");
4969   InstructionMark im(this);
4970   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4971   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4972   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4973   emit_int8(0x5C);
4974   emit_operand(dst, src);
4975 }
4976 
4977 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4978   assert(VM_Version::supports_avx(), "");
4979   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4980   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4981   emit_int8(0x5C);
4982   emit_int8((unsigned char)(0xC0 | encode));
4983 }
4984 
4985 //====================VECTOR ARITHMETIC=====================================
4986 
4987 // Float-point vector arithmetic
4988 
4989 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
4990   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4991   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4992   attributes.set_rex_vex_w_reverted();
4993   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4994   emit_int8(0x58);
4995   emit_int8((unsigned char)(0xC0 | encode));
4996 }
4997 
4998 void Assembler::addpd(XMMRegister dst, Address src) {
4999   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5000   InstructionMark im(this);
5001   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5002   attributes.set_rex_vex_w_reverted();
5003   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5004   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5005   emit_int8(0x58);
5006   emit_operand(dst, src);
5007 }
5008 
5009 
5010 void Assembler::addps(XMMRegister dst, XMMRegister src) {
5011   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5012   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5013   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5014   emit_int8(0x58);
5015   emit_int8((unsigned char)(0xC0 | encode));
5016 }
5017 
5018 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5019   assert(VM_Version::supports_avx(), "");
5020   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5021   attributes.set_rex_vex_w_reverted();
5022   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5023   emit_int8(0x58);
5024   emit_int8((unsigned char)(0xC0 | encode));
5025 }
5026 
5027 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5028   assert(VM_Version::supports_avx(), "");
5029   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5030   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5031   emit_int8(0x58);
5032   emit_int8((unsigned char)(0xC0 | encode));
5033 }
5034 
5035 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5036   assert(VM_Version::supports_avx(), "");
5037   InstructionMark im(this);
5038   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5039   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5040   attributes.set_rex_vex_w_reverted();
5041   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5042   emit_int8(0x58);
5043   emit_operand(dst, src);
5044 }
5045 
5046 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5047   assert(VM_Version::supports_avx(), "");
5048   InstructionMark im(this);
5049   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5050   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5051   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5052   emit_int8(0x58);
5053   emit_operand(dst, src);
5054 }
5055 
5056 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
5057   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5058   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5059   attributes.set_rex_vex_w_reverted();
5060   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5061   emit_int8(0x5C);
5062   emit_int8((unsigned char)(0xC0 | encode));
5063 }
5064 
5065 void Assembler::subps(XMMRegister dst, XMMRegister src) {
5066   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5067   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5068   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5069   emit_int8(0x5C);
5070   emit_int8((unsigned char)(0xC0 | encode));
5071 }
5072 
5073 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5074   assert(VM_Version::supports_avx(), "");
5075   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5076   attributes.set_rex_vex_w_reverted();
5077   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5078   emit_int8(0x5C);
5079   emit_int8((unsigned char)(0xC0 | encode));
5080 }
5081 
5082 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5083   assert(VM_Version::supports_avx(), "");
5084   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5085   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5086   emit_int8(0x5C);
5087   emit_int8((unsigned char)(0xC0 | encode));
5088 }
5089 
5090 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5091   assert(VM_Version::supports_avx(), "");
5092   InstructionMark im(this);
5093   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5094   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5095   attributes.set_rex_vex_w_reverted();
5096   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5097   emit_int8(0x5C);
5098   emit_operand(dst, src);
5099 }
5100 
5101 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5102   assert(VM_Version::supports_avx(), "");
5103   InstructionMark im(this);
5104   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5105   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5106   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5107   emit_int8(0x5C);
5108   emit_operand(dst, src);
5109 }
5110 
5111 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
5112   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5113   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5114   attributes.set_rex_vex_w_reverted();
5115   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5116   emit_int8(0x59);
5117   emit_int8((unsigned char)(0xC0 | encode));
5118 }
5119 
5120 void Assembler::mulpd(XMMRegister dst, Address src) {
5121   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5122   InstructionMark im(this);
5123   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5124   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5125   attributes.set_rex_vex_w_reverted();
5126   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5127   emit_int8(0x59);
5128   emit_operand(dst, src);
5129 }
5130 
5131 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
5132   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5133   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5134   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5135   emit_int8(0x59);
5136   emit_int8((unsigned char)(0xC0 | encode));
5137 }
5138 
5139 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5140   assert(VM_Version::supports_avx(), "");
5141   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5142   attributes.set_rex_vex_w_reverted();
5143   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5144   emit_int8(0x59);
5145   emit_int8((unsigned char)(0xC0 | encode));
5146 }
5147 
5148 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5149   assert(VM_Version::supports_avx(), "");
5150   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5151   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5152   emit_int8(0x59);
5153   emit_int8((unsigned char)(0xC0 | encode));
5154 }
5155 
5156 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5157   assert(VM_Version::supports_avx(), "");
5158   InstructionMark im(this);
5159   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5160   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5161   attributes.set_rex_vex_w_reverted();
5162   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5163   emit_int8(0x59);
5164   emit_operand(dst, src);
5165 }
5166 
5167 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5168   assert(VM_Version::supports_avx(), "");
5169   InstructionMark im(this);
5170   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5171   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5172   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5173   emit_int8(0x59);
5174   emit_operand(dst, src);
5175 }
5176 
5177 void Assembler::vfmadd231pd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
5178   assert(VM_Version::supports_fma(), "");
5179   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5180   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5181   emit_int8((unsigned char)0xB8);
5182   emit_int8((unsigned char)(0xC0 | encode));
5183 }
5184 
5185 void Assembler::vfmadd231ps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
5186   assert(VM_Version::supports_fma(), "");
5187   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5188   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5189   emit_int8((unsigned char)0xB8);
5190   emit_int8((unsigned char)(0xC0 | encode));
5191 }
5192 
5193 void Assembler::vfmadd231pd(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) {
5194   assert(VM_Version::supports_fma(), "");
5195   InstructionMark im(this);
5196   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5197   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5198   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5199   emit_int8((unsigned char)0xB8);
5200   emit_operand(dst, src2);
5201 }
5202 
5203 void Assembler::vfmadd231ps(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) {
5204   assert(VM_Version::supports_fma(), "");
5205   InstructionMark im(this);
5206   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5207   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5208   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5209   emit_int8((unsigned char)0xB8);
5210   emit_operand(dst, src2);
5211 }
5212 
5213 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
5214   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5215   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5216   attributes.set_rex_vex_w_reverted();
5217   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5218   emit_int8(0x5E);
5219   emit_int8((unsigned char)(0xC0 | encode));
5220 }
5221 
5222 void Assembler::divps(XMMRegister dst, XMMRegister src) {
5223   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5224   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5225   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5226   emit_int8(0x5E);
5227   emit_int8((unsigned char)(0xC0 | encode));
5228 }
5229 
5230 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5231   assert(VM_Version::supports_avx(), "");
5232   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5233   attributes.set_rex_vex_w_reverted();
5234   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5235   emit_int8(0x5E);
5236   emit_int8((unsigned char)(0xC0 | encode));
5237 }
5238 
5239 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5240   assert(VM_Version::supports_avx(), "");
5241   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5242   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5243   emit_int8(0x5E);
5244   emit_int8((unsigned char)(0xC0 | encode));
5245 }
5246 
5247 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5248   assert(VM_Version::supports_avx(), "");
5249   InstructionMark im(this);
5250   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5251   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5252   attributes.set_rex_vex_w_reverted();
5253   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5254   emit_int8(0x5E);
5255   emit_operand(dst, src);
5256 }
5257 
5258 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5259   assert(VM_Version::supports_avx(), "");
5260   InstructionMark im(this);
5261   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5262   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5263   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5264   emit_int8(0x5E);
5265   emit_operand(dst, src);
5266 }
5267 
5268 void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
5269   assert(VM_Version::supports_avx(), "");
5270   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5271   attributes.set_rex_vex_w_reverted();
5272   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5273   emit_int8(0x51);
5274   emit_int8((unsigned char)(0xC0 | encode));
5275 }
5276 
5277 void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
5278   assert(VM_Version::supports_avx(), "");
5279   InstructionMark im(this);
5280   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5281   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5282   attributes.set_rex_vex_w_reverted();
5283   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5284   emit_int8(0x51);
5285   emit_operand(dst, src);
5286 }
5287 
5288 void Assembler::vsqrtps(XMMRegister dst, XMMRegister src, int vector_len) {
5289   assert(VM_Version::supports_avx(), "");
5290   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5291   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5292   emit_int8(0x51);
5293   emit_int8((unsigned char)(0xC0 | encode));
5294 }
5295 
5296 void Assembler::vsqrtps(XMMRegister dst, Address src, int vector_len) {
5297   assert(VM_Version::supports_avx(), "");
5298   InstructionMark im(this);
5299   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5300   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5301   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5302   emit_int8(0x51);
5303   emit_operand(dst, src);
5304 }
5305 
5306 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
5307   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5308   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5309   attributes.set_rex_vex_w_reverted();
5310   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5311   emit_int8(0x54);
5312   emit_int8((unsigned char)(0xC0 | encode));
5313 }
5314 
5315 void Assembler::andps(XMMRegister dst, XMMRegister src) {
5316   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5317   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5318   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5319   emit_int8(0x54);
5320   emit_int8((unsigned char)(0xC0 | encode));
5321 }
5322 
5323 void Assembler::andps(XMMRegister dst, Address src) {
5324   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5325   InstructionMark im(this);
5326   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5327   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5328   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5329   emit_int8(0x54);
5330   emit_operand(dst, src);
5331 }
5332 
5333 void Assembler::andpd(XMMRegister dst, Address src) {
5334   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5335   InstructionMark im(this);
5336   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5337   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5338   attributes.set_rex_vex_w_reverted();
5339   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5340   emit_int8(0x54);
5341   emit_operand(dst, src);
5342 }
5343 
5344 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5345   assert(VM_Version::supports_avx(), "");
5346   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5347   attributes.set_rex_vex_w_reverted();
5348   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5349   emit_int8(0x54);
5350   emit_int8((unsigned char)(0xC0 | encode));
5351 }
5352 
5353 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5354   assert(VM_Version::supports_avx(), "");
5355   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5356   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5357   emit_int8(0x54);
5358   emit_int8((unsigned char)(0xC0 | encode));
5359 }
5360 
5361 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5362   assert(VM_Version::supports_avx(), "");
5363   InstructionMark im(this);
5364   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5365   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5366   attributes.set_rex_vex_w_reverted();
5367   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5368   emit_int8(0x54);
5369   emit_operand(dst, src);
5370 }
5371 
5372 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5373   assert(VM_Version::supports_avx(), "");
5374   InstructionMark im(this);
5375   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5376   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5377   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5378   emit_int8(0x54);
5379   emit_operand(dst, src);
5380 }
5381 
5382 void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
5383   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5384   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5385   attributes.set_rex_vex_w_reverted();
5386   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5387   emit_int8(0x15);
5388   emit_int8((unsigned char)(0xC0 | encode));
5389 }
5390 
5391 void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
5392   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5393   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5394   attributes.set_rex_vex_w_reverted();
5395   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5396   emit_int8(0x14);
5397   emit_int8((unsigned char)(0xC0 | encode));
5398 }
5399 
5400 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
5401   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5402   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5403   attributes.set_rex_vex_w_reverted();
5404   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5405   emit_int8(0x57);
5406   emit_int8((unsigned char)(0xC0 | encode));
5407 }
5408 
5409 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
5410   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5411   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5412   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5413   emit_int8(0x57);
5414   emit_int8((unsigned char)(0xC0 | encode));
5415 }
5416 
5417 void Assembler::xorpd(XMMRegister dst, Address src) {
5418   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5419   InstructionMark im(this);
5420   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5421   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5422   attributes.set_rex_vex_w_reverted();
5423   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5424   emit_int8(0x57);
5425   emit_operand(dst, src);
5426 }
5427 
5428 void Assembler::xorps(XMMRegister dst, Address src) {
5429   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5430   InstructionMark im(this);
5431   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5432   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5433   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5434   emit_int8(0x57);
5435   emit_operand(dst, src);
5436 }
5437 
5438 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5439   assert(VM_Version::supports_avx(), "");
5440   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5441   attributes.set_rex_vex_w_reverted();
5442   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5443   emit_int8(0x57);
5444   emit_int8((unsigned char)(0xC0 | encode));
5445 }
5446 
5447 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5448   assert(VM_Version::supports_avx(), "");
5449   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5450   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5451   emit_int8(0x57);
5452   emit_int8((unsigned char)(0xC0 | encode));
5453 }
5454 
5455 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5456   assert(VM_Version::supports_avx(), "");
5457   InstructionMark im(this);
5458   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5459   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5460   attributes.set_rex_vex_w_reverted();
5461   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5462   emit_int8(0x57);
5463   emit_operand(dst, src);
5464 }
5465 
5466 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5467   assert(VM_Version::supports_avx(), "");
5468   InstructionMark im(this);
5469   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5470   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5471   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5472   emit_int8(0x57);
5473   emit_operand(dst, src);
5474 }
5475 
5476 // Integer vector arithmetic
5477 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5478   assert(VM_Version::supports_avx() && (vector_len == 0) ||
5479          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
5480   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5481   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5482   emit_int8(0x01);
5483   emit_int8((unsigned char)(0xC0 | encode));
5484 }
5485 
5486 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5487   assert(VM_Version::supports_avx() && (vector_len == 0) ||
5488          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
5489   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
5490   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5491   emit_int8(0x02);
5492   emit_int8((unsigned char)(0xC0 | encode));
5493 }
5494 
5495 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
5496   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5497   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5498   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5499   emit_int8((unsigned char)0xFC);
5500   emit_int8((unsigned char)(0xC0 | encode));
5501 }
5502 
5503 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
5504   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5505   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5506   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5507   emit_int8((unsigned char)0xFD);
5508   emit_int8((unsigned char)(0xC0 | encode));
5509 }
5510 
5511 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
5512   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5513   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5514   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5515   emit_int8((unsigned char)0xFE);
5516   emit_int8((unsigned char)(0xC0 | encode));
5517 }
5518 
5519 void Assembler::paddd(XMMRegister dst, Address src) {
5520   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5521   InstructionMark im(this);
5522   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5523   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5524   emit_int8((unsigned char)0xFE);
5525   emit_operand(dst, src);
5526 }
5527 
5528 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
5529   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5530   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5531   attributes.set_rex_vex_w_reverted();
5532   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5533   emit_int8((unsigned char)0xD4);
5534   emit_int8((unsigned char)(0xC0 | encode));
5535 }
5536 
5537 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
5538   assert(VM_Version::supports_sse3(), "");
5539   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5540   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5541   emit_int8(0x01);
5542   emit_int8((unsigned char)(0xC0 | encode));
5543 }
5544 
5545 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
5546   assert(VM_Version::supports_sse3(), "");
5547   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
5548   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5549   emit_int8(0x02);
5550   emit_int8((unsigned char)(0xC0 | encode));
5551 }
5552 
5553 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5554   assert(UseAVX > 0, "requires some form of AVX");
5555   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5556   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5557   emit_int8((unsigned char)0xFC);
5558   emit_int8((unsigned char)(0xC0 | encode));
5559 }
5560 
5561 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5562   assert(UseAVX > 0, "requires some form of AVX");
5563   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5564   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5565   emit_int8((unsigned char)0xFD);
5566   emit_int8((unsigned char)(0xC0 | encode));
5567 }
5568 
5569 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5570   assert(UseAVX > 0, "requires some form of AVX");
5571   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5572   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5573   emit_int8((unsigned char)0xFE);
5574   emit_int8((unsigned char)(0xC0 | encode));
5575 }
5576 
5577 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5578   assert(UseAVX > 0, "requires some form of AVX");
5579   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5580   attributes.set_rex_vex_w_reverted();
5581   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5582   emit_int8((unsigned char)0xD4);
5583   emit_int8((unsigned char)(0xC0 | encode));
5584 }
5585 
5586 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5587   assert(UseAVX > 0, "requires some form of AVX");
5588   InstructionMark im(this);
5589   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5590   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5591   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5592   emit_int8((unsigned char)0xFC);
5593   emit_operand(dst, src);
5594 }
5595 
5596 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5597   assert(UseAVX > 0, "requires some form of AVX");
5598   InstructionMark im(this);
5599   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5600   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5601   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5602   emit_int8((unsigned char)0xFD);
5603   emit_operand(dst, src);
5604 }
5605 
5606 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5607   assert(UseAVX > 0, "requires some form of AVX");
5608   InstructionMark im(this);
5609   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5610   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5611   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5612   emit_int8((unsigned char)0xFE);
5613   emit_operand(dst, src);
5614 }
5615 
5616 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5617   assert(UseAVX > 0, "requires some form of AVX");
5618   InstructionMark im(this);
5619   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5620   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5621   attributes.set_rex_vex_w_reverted();
5622   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5623   emit_int8((unsigned char)0xD4);
5624   emit_operand(dst, src);
5625 }
5626 
5627 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
5628   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5629   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5630   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5631   emit_int8((unsigned char)0xF8);
5632   emit_int8((unsigned char)(0xC0 | encode));
5633 }
5634 
5635 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
5636   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5637   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5638   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5639   emit_int8((unsigned char)0xF9);
5640   emit_int8((unsigned char)(0xC0 | encode));
5641 }
5642 
5643 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
5644   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5645   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5646   emit_int8((unsigned char)0xFA);
5647   emit_int8((unsigned char)(0xC0 | encode));
5648 }
5649 
5650 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
5651   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5652   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5653   attributes.set_rex_vex_w_reverted();
5654   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5655   emit_int8((unsigned char)0xFB);
5656   emit_int8((unsigned char)(0xC0 | encode));
5657 }
5658 
5659 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5660   assert(UseAVX > 0, "requires some form of AVX");
5661   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5662   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5663   emit_int8((unsigned char)0xF8);
5664   emit_int8((unsigned char)(0xC0 | encode));
5665 }
5666 
5667 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5668   assert(UseAVX > 0, "requires some form of AVX");
5669   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5670   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5671   emit_int8((unsigned char)0xF9);
5672   emit_int8((unsigned char)(0xC0 | encode));
5673 }
5674 
5675 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5676   assert(UseAVX > 0, "requires some form of AVX");
5677   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5678   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5679   emit_int8((unsigned char)0xFA);
5680   emit_int8((unsigned char)(0xC0 | encode));
5681 }
5682 
5683 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5684   assert(UseAVX > 0, "requires some form of AVX");
5685   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5686   attributes.set_rex_vex_w_reverted();
5687   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5688   emit_int8((unsigned char)0xFB);
5689   emit_int8((unsigned char)(0xC0 | encode));
5690 }
5691 
5692 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5693   assert(UseAVX > 0, "requires some form of AVX");
5694   InstructionMark im(this);
5695   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5696   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5697   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5698   emit_int8((unsigned char)0xF8);
5699   emit_operand(dst, src);
5700 }
5701 
5702 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5703   assert(UseAVX > 0, "requires some form of AVX");
5704   InstructionMark im(this);
5705   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5706   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5707   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5708   emit_int8((unsigned char)0xF9);
5709   emit_operand(dst, src);
5710 }
5711 
5712 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5713   assert(UseAVX > 0, "requires some form of AVX");
5714   InstructionMark im(this);
5715   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5716   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5717   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5718   emit_int8((unsigned char)0xFA);
5719   emit_operand(dst, src);
5720 }
5721 
5722 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5723   assert(UseAVX > 0, "requires some form of AVX");
5724   InstructionMark im(this);
5725   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5726   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5727   attributes.set_rex_vex_w_reverted();
5728   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5729   emit_int8((unsigned char)0xFB);
5730   emit_operand(dst, src);
5731 }
5732 
5733 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
5734   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5735   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5736   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5737   emit_int8((unsigned char)0xD5);
5738   emit_int8((unsigned char)(0xC0 | encode));
5739 }
5740 
5741 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
5742   assert(VM_Version::supports_sse4_1(), "");
5743   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5744   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5745   emit_int8(0x40);
5746   emit_int8((unsigned char)(0xC0 | encode));
5747 }
5748 
5749 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5750   assert(UseAVX > 0, "requires some form of AVX");
5751   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5752   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5753   emit_int8((unsigned char)0xD5);
5754   emit_int8((unsigned char)(0xC0 | encode));
5755 }
5756 
5757 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5758   assert(UseAVX > 0, "requires some form of AVX");
5759   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5760   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5761   emit_int8(0x40);
5762   emit_int8((unsigned char)(0xC0 | encode));
5763 }
5764 
5765 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5766   assert(UseAVX > 2, "requires some form of EVEX");
5767   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5768   attributes.set_is_evex_instruction();
5769   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5770   emit_int8(0x40);
5771   emit_int8((unsigned char)(0xC0 | encode));
5772 }
5773 
5774 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5775   assert(UseAVX > 0, "requires some form of AVX");
5776   InstructionMark im(this);
5777   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5778   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5779   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5780   emit_int8((unsigned char)0xD5);
5781   emit_operand(dst, src);
5782 }
5783 
5784 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5785   assert(UseAVX > 0, "requires some form of AVX");
5786   InstructionMark im(this);
5787   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5788   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5789   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5790   emit_int8(0x40);
5791   emit_operand(dst, src);
5792 }
5793 
5794 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5795   assert(UseAVX > 2, "requires some form of EVEX");
5796   InstructionMark im(this);
5797   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5798   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5799   attributes.set_is_evex_instruction();
5800   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5801   emit_int8(0x40);
5802   emit_operand(dst, src);
5803 }
5804 
5805 // Shift packed integers left by specified number of bits.
5806 void Assembler::psllw(XMMRegister dst, int shift) {
5807   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5808   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5809   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
5810   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5811   emit_int8(0x71);
5812   emit_int8((unsigned char)(0xC0 | encode));
5813   emit_int8(shift & 0xFF);
5814 }
5815 
5816 void Assembler::pslld(XMMRegister dst, int shift) {
5817   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5818   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5819   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
5820   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5821   emit_int8(0x72);
5822   emit_int8((unsigned char)(0xC0 | encode));
5823   emit_int8(shift & 0xFF);
5824 }
5825 
5826 void Assembler::psllq(XMMRegister dst, int shift) {
5827   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5828   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5829   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
5830   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5831   emit_int8(0x73);
5832   emit_int8((unsigned char)(0xC0 | encode));
5833   emit_int8(shift & 0xFF);
5834 }
5835 
5836 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
5837   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5838   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5839   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5840   emit_int8((unsigned char)0xF1);
5841   emit_int8((unsigned char)(0xC0 | encode));
5842 }
5843 
5844 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
5845   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5846   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5847   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5848   emit_int8((unsigned char)0xF2);
5849   emit_int8((unsigned char)(0xC0 | encode));
5850 }
5851 
5852 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
5853   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5854   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5855   attributes.set_rex_vex_w_reverted();
5856   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5857   emit_int8((unsigned char)0xF3);
5858   emit_int8((unsigned char)(0xC0 | encode));
5859 }
5860 
5861 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5862   assert(UseAVX > 0, "requires some form of AVX");
5863   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5864   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
5865   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5866   emit_int8(0x71);
5867   emit_int8((unsigned char)(0xC0 | encode));
5868   emit_int8(shift & 0xFF);
5869 }
5870 
5871 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5872   assert(UseAVX > 0, "requires some form of AVX");
5873   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5874   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5875   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
5876   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5877   emit_int8(0x72);
5878   emit_int8((unsigned char)(0xC0 | encode));
5879   emit_int8(shift & 0xFF);
5880 }
5881 
5882 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5883   assert(UseAVX > 0, "requires some form of AVX");
5884   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5885   attributes.set_rex_vex_w_reverted();
5886   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
5887   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5888   emit_int8(0x73);
5889   emit_int8((unsigned char)(0xC0 | encode));
5890   emit_int8(shift & 0xFF);
5891 }
5892 
5893 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5894   assert(UseAVX > 0, "requires some form of AVX");
5895   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5896   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5897   emit_int8((unsigned char)0xF1);
5898   emit_int8((unsigned char)(0xC0 | encode));
5899 }
5900 
5901 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5902   assert(UseAVX > 0, "requires some form of AVX");
5903   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5904   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5905   emit_int8((unsigned char)0xF2);
5906   emit_int8((unsigned char)(0xC0 | encode));
5907 }
5908 
5909 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5910   assert(UseAVX > 0, "requires some form of AVX");
5911   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5912   attributes.set_rex_vex_w_reverted();
5913   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5914   emit_int8((unsigned char)0xF3);
5915   emit_int8((unsigned char)(0xC0 | encode));
5916 }
5917 
5918 // Shift packed integers logically right by specified number of bits.
5919 void Assembler::psrlw(XMMRegister dst, int shift) {
5920   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5921   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5922   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
5923   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5924   emit_int8(0x71);
5925   emit_int8((unsigned char)(0xC0 | encode));
5926   emit_int8(shift & 0xFF);
5927 }
5928 
5929 void Assembler::psrld(XMMRegister dst, int shift) {
5930   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5931   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5932   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
5933   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5934   emit_int8(0x72);
5935   emit_int8((unsigned char)(0xC0 | encode));
5936   emit_int8(shift & 0xFF);
5937 }
5938 
5939 void Assembler::psrlq(XMMRegister dst, int shift) {
5940   // Do not confuse it with psrldq SSE2 instruction which
5941   // shifts 128 bit value in xmm register by number of bytes.
5942   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5943   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5944   attributes.set_rex_vex_w_reverted();
5945   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
5946   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5947   emit_int8(0x73);
5948   emit_int8((unsigned char)(0xC0 | encode));
5949   emit_int8(shift & 0xFF);
5950 }
5951 
5952 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
5953   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5954   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5955   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5956   emit_int8((unsigned char)0xD1);
5957   emit_int8((unsigned char)(0xC0 | encode));
5958 }
5959 
5960 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
5961   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5962   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5963   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5964   emit_int8((unsigned char)0xD2);
5965   emit_int8((unsigned char)(0xC0 | encode));
5966 }
5967 
5968 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
5969   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5970   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5971   attributes.set_rex_vex_w_reverted();
5972   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5973   emit_int8((unsigned char)0xD3);
5974   emit_int8((unsigned char)(0xC0 | encode));
5975 }
5976 
5977 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5978   assert(UseAVX > 0, "requires some form of AVX");
5979   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5980   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
5981   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5982   emit_int8(0x71);
5983   emit_int8((unsigned char)(0xC0 | encode));
5984   emit_int8(shift & 0xFF);
5985 }
5986 
5987 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5988   assert(UseAVX > 0, "requires some form of AVX");
5989   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5990   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
5991   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5992   emit_int8(0x72);
5993   emit_int8((unsigned char)(0xC0 | encode));
5994   emit_int8(shift & 0xFF);
5995 }
5996 
5997 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5998   assert(UseAVX > 0, "requires some form of AVX");
5999   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6000   attributes.set_rex_vex_w_reverted();
6001   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
6002   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6003   emit_int8(0x73);
6004   emit_int8((unsigned char)(0xC0 | encode));
6005   emit_int8(shift & 0xFF);
6006 }
6007 
6008 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6009   assert(UseAVX > 0, "requires some form of AVX");
6010   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6011   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6012   emit_int8((unsigned char)0xD1);
6013   emit_int8((unsigned char)(0xC0 | encode));
6014 }
6015 
6016 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6017   assert(UseAVX > 0, "requires some form of AVX");
6018   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6019   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6020   emit_int8((unsigned char)0xD2);
6021   emit_int8((unsigned char)(0xC0 | encode));
6022 }
6023 
6024 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6025   assert(UseAVX > 0, "requires some form of AVX");
6026   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6027   attributes.set_rex_vex_w_reverted();
6028   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6029   emit_int8((unsigned char)0xD3);
6030   emit_int8((unsigned char)(0xC0 | encode));
6031 }
6032 
6033 // Shift packed integers arithmetically right by specified number of bits.
6034 void Assembler::psraw(XMMRegister dst, int shift) {
6035   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6036   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6037   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6038   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6039   emit_int8(0x71);
6040   emit_int8((unsigned char)(0xC0 | encode));
6041   emit_int8(shift & 0xFF);
6042 }
6043 
6044 void Assembler::psrad(XMMRegister dst, int shift) {
6045   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6046   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6047   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
6048   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6049   emit_int8(0x72);
6050   emit_int8((unsigned char)(0xC0 | encode));
6051   emit_int8(shift & 0xFF);
6052 }
6053 
6054 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
6055   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6056   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6057   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6058   emit_int8((unsigned char)0xE1);
6059   emit_int8((unsigned char)(0xC0 | encode));
6060 }
6061 
6062 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
6063   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6064   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6065   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6066   emit_int8((unsigned char)0xE2);
6067   emit_int8((unsigned char)(0xC0 | encode));
6068 }
6069 
6070 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6071   assert(UseAVX > 0, "requires some form of AVX");
6072   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6073   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6074   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6075   emit_int8(0x71);
6076   emit_int8((unsigned char)(0xC0 | encode));
6077   emit_int8(shift & 0xFF);
6078 }
6079 
6080 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6081   assert(UseAVX > 0, "requires some form of AVX");
6082   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6083   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6084   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6085   emit_int8(0x72);
6086   emit_int8((unsigned char)(0xC0 | encode));
6087   emit_int8(shift & 0xFF);
6088 }
6089 
6090 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6091   assert(UseAVX > 0, "requires some form of AVX");
6092   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6093   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6094   emit_int8((unsigned char)0xE1);
6095   emit_int8((unsigned char)(0xC0 | encode));
6096 }
6097 
6098 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6099   assert(UseAVX > 0, "requires some form of AVX");
6100   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6101   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6102   emit_int8((unsigned char)0xE2);
6103   emit_int8((unsigned char)(0xC0 | encode));
6104 }
6105 
6106 
6107 // logical operations packed integers
6108 void Assembler::pand(XMMRegister dst, XMMRegister src) {
6109   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6110   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6111   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6112   emit_int8((unsigned char)0xDB);
6113   emit_int8((unsigned char)(0xC0 | encode));
6114 }
6115 
6116 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6117   assert(UseAVX > 0, "requires some form of AVX");
6118   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6119   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6120   emit_int8((unsigned char)0xDB);
6121   emit_int8((unsigned char)(0xC0 | encode));
6122 }
6123 
6124 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6125   assert(UseAVX > 0, "requires some form of AVX");
6126   InstructionMark im(this);
6127   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6128   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6129   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6130   emit_int8((unsigned char)0xDB);
6131   emit_operand(dst, src);
6132 }
6133 
6134 void Assembler::pandn(XMMRegister dst, XMMRegister src) {
6135   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6136   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6137   attributes.set_rex_vex_w_reverted();
6138   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6139   emit_int8((unsigned char)0xDF);
6140   emit_int8((unsigned char)(0xC0 | encode));
6141 }
6142 
6143 void Assembler::por(XMMRegister dst, XMMRegister src) {
6144   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6145   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6146   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6147   emit_int8((unsigned char)0xEB);
6148   emit_int8((unsigned char)(0xC0 | encode));
6149 }
6150 
6151 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6152   assert(UseAVX > 0, "requires some form of AVX");
6153   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6154   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6155   emit_int8((unsigned char)0xEB);
6156   emit_int8((unsigned char)(0xC0 | encode));
6157 }
6158 
6159 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6160   assert(UseAVX > 0, "requires some form of AVX");
6161   InstructionMark im(this);
6162   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6163   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6164   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6165   emit_int8((unsigned char)0xEB);
6166   emit_operand(dst, src);
6167 }
6168 
6169 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
6170   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6171   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6172   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6173   emit_int8((unsigned char)0xEF);
6174   emit_int8((unsigned char)(0xC0 | encode));
6175 }
6176 
6177 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6178   assert(UseAVX > 0, "requires some form of AVX");
6179   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6180   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6181   emit_int8((unsigned char)0xEF);
6182   emit_int8((unsigned char)(0xC0 | encode));
6183 }
6184 
6185 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6186   assert(UseAVX > 0, "requires some form of AVX");
6187   InstructionMark im(this);
6188   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6189   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6190   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6191   emit_int8((unsigned char)0xEF);
6192   emit_operand(dst, src);
6193 }
6194 
6195 
6196 // vinserti forms
6197 
6198 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6199   assert(VM_Version::supports_avx2(), "");
6200   assert(imm8 <= 0x01, "imm8: %u", imm8);
6201   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6202   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6203   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6204   emit_int8(0x38);
6205   emit_int8((unsigned char)(0xC0 | encode));
6206   // 0x00 - insert into lower 128 bits
6207   // 0x01 - insert into upper 128 bits
6208   emit_int8(imm8 & 0x01);
6209 }
6210 
6211 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6212   assert(VM_Version::supports_avx2(), "");
6213   assert(dst != xnoreg, "sanity");
6214   assert(imm8 <= 0x01, "imm8: %u", imm8);
6215   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6216   InstructionMark im(this);
6217   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6218   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6219   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6220   emit_int8(0x38);
6221   emit_operand(dst, src);
6222   // 0x00 - insert into lower 128 bits
6223   // 0x01 - insert into upper 128 bits
6224   emit_int8(imm8 & 0x01);
6225 }
6226 
6227 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6228   assert(VM_Version::supports_evex(), "");
6229   assert(imm8 <= 0x03, "imm8: %u", imm8);
6230   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6231   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6232   emit_int8(0x38);
6233   emit_int8((unsigned char)(0xC0 | encode));
6234   // 0x00 - insert into q0 128 bits (0..127)
6235   // 0x01 - insert into q1 128 bits (128..255)
6236   // 0x02 - insert into q2 128 bits (256..383)
6237   // 0x03 - insert into q3 128 bits (384..511)
6238   emit_int8(imm8 & 0x03);
6239 }
6240 
6241 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6242   assert(VM_Version::supports_avx(), "");
6243   assert(dst != xnoreg, "sanity");
6244   assert(imm8 <= 0x03, "imm8: %u", imm8);
6245   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6246   InstructionMark im(this);
6247   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6248   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6249   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6250   emit_int8(0x18);
6251   emit_operand(dst, src);
6252   // 0x00 - insert into q0 128 bits (0..127)
6253   // 0x01 - insert into q1 128 bits (128..255)
6254   // 0x02 - insert into q2 128 bits (256..383)
6255   // 0x03 - insert into q3 128 bits (384..511)
6256   emit_int8(imm8 & 0x03);
6257 }
6258 
6259 void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6260   assert(VM_Version::supports_evex(), "");
6261   assert(imm8 <= 0x01, "imm8: %u", imm8);
6262   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6263   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6264   emit_int8(0x38);
6265   emit_int8((unsigned char)(0xC0 | encode));
6266   // 0x00 - insert into lower 256 bits
6267   // 0x01 - insert into upper 256 bits
6268   emit_int8(imm8 & 0x01);
6269 }
6270 
6271 
6272 // vinsertf forms
6273 
6274 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6275   assert(VM_Version::supports_avx(), "");
6276   assert(imm8 <= 0x01, "imm8: %u", imm8);
6277   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6278   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6279   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6280   emit_int8(0x18);
6281   emit_int8((unsigned char)(0xC0 | encode));
6282   // 0x00 - insert into lower 128 bits
6283   // 0x01 - insert into upper 128 bits
6284   emit_int8(imm8 & 0x01);
6285 }
6286 
6287 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6288   assert(VM_Version::supports_avx(), "");
6289   assert(dst != xnoreg, "sanity");
6290   assert(imm8 <= 0x01, "imm8: %u", imm8);
6291   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6292   InstructionMark im(this);
6293   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6294   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6295   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6296   emit_int8(0x18);
6297   emit_operand(dst, src);
6298   // 0x00 - insert into lower 128 bits
6299   // 0x01 - insert into upper 128 bits
6300   emit_int8(imm8 & 0x01);
6301 }
6302 
6303 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6304   assert(VM_Version::supports_evex(), "");
6305   assert(imm8 <= 0x03, "imm8: %u", imm8);
6306   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6307   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6308   emit_int8(0x18);
6309   emit_int8((unsigned char)(0xC0 | encode));
6310   // 0x00 - insert into q0 128 bits (0..127)
6311   // 0x01 - insert into q1 128 bits (128..255)
6312   // 0x02 - insert into q2 128 bits (256..383)
6313   // 0x03 - insert into q3 128 bits (384..511)
6314   emit_int8(imm8 & 0x03);
6315 }
6316 
6317 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6318   assert(VM_Version::supports_avx(), "");
6319   assert(dst != xnoreg, "sanity");
6320   assert(imm8 <= 0x03, "imm8: %u", imm8);
6321   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6322   InstructionMark im(this);
6323   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6324   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6325   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6326   emit_int8(0x18);
6327   emit_operand(dst, src);
6328   // 0x00 - insert into q0 128 bits (0..127)
6329   // 0x01 - insert into q1 128 bits (128..255)
6330   // 0x02 - insert into q2 128 bits (256..383)
6331   // 0x03 - insert into q3 128 bits (384..511)
6332   emit_int8(imm8 & 0x03);
6333 }
6334 
6335 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6336   assert(VM_Version::supports_evex(), "");
6337   assert(imm8 <= 0x01, "imm8: %u", imm8);
6338   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6339   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6340   emit_int8(0x1A);
6341   emit_int8((unsigned char)(0xC0 | encode));
6342   // 0x00 - insert into lower 256 bits
6343   // 0x01 - insert into upper 256 bits
6344   emit_int8(imm8 & 0x01);
6345 }
6346 
6347 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6348   assert(VM_Version::supports_evex(), "");
6349   assert(dst != xnoreg, "sanity");
6350   assert(imm8 <= 0x01, "imm8: %u", imm8);
6351   InstructionMark im(this);
6352   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6353   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
6354   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6355   emit_int8(0x1A);
6356   emit_operand(dst, src);
6357   // 0x00 - insert into lower 256 bits
6358   // 0x01 - insert into upper 256 bits
6359   emit_int8(imm8 & 0x01);
6360 }
6361 
6362 
6363 // vextracti forms
6364 
6365 void Assembler::vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6366   assert(VM_Version::supports_avx(), "");
6367   assert(imm8 <= 0x01, "imm8: %u", imm8);
6368   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6369   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6370   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6371   emit_int8(0x39);
6372   emit_int8((unsigned char)(0xC0 | encode));
6373   // 0x00 - extract from lower 128 bits
6374   // 0x01 - extract from upper 128 bits
6375   emit_int8(imm8 & 0x01);
6376 }
6377 
6378 void Assembler::vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
6379   assert(VM_Version::supports_avx2(), "");
6380   assert(src != xnoreg, "sanity");
6381   assert(imm8 <= 0x01, "imm8: %u", imm8);
6382   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6383   InstructionMark im(this);
6384   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6385   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6386   attributes.reset_is_clear_context();
6387   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6388   emit_int8(0x39);
6389   emit_operand(src, dst);
6390   // 0x00 - extract from lower 128 bits
6391   // 0x01 - extract from upper 128 bits
6392   emit_int8(imm8 & 0x01);
6393 }
6394 
6395 void Assembler::vextracti32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6396   assert(VM_Version::supports_avx(), "");
6397   assert(imm8 <= 0x03, "imm8: %u", imm8);
6398   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6399   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6400   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6401   emit_int8(0x39);
6402   emit_int8((unsigned char)(0xC0 | encode));
6403   // 0x00 - extract from bits 127:0
6404   // 0x01 - extract from bits 255:128
6405   // 0x02 - extract from bits 383:256
6406   // 0x03 - extract from bits 511:384
6407   emit_int8(imm8 & 0x03);
6408 }
6409 
6410 void Assembler::vextracti32x4(Address dst, XMMRegister src, uint8_t imm8) {
6411   assert(VM_Version::supports_evex(), "");
6412   assert(src != xnoreg, "sanity");
6413   assert(imm8 <= 0x03, "imm8: %u", imm8);
6414   InstructionMark im(this);
6415   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6416   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6417   attributes.reset_is_clear_context();
6418   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6419   emit_int8(0x39);
6420   emit_operand(src, dst);
6421   // 0x00 - extract from bits 127:0
6422   // 0x01 - extract from bits 255:128
6423   // 0x02 - extract from bits 383:256
6424   // 0x03 - extract from bits 511:384
6425   emit_int8(imm8 & 0x03);
6426 }
6427 
6428 void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6429   assert(VM_Version::supports_avx512dq(), "");
6430   assert(imm8 <= 0x03, "imm8: %u", imm8);
6431   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6432   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6433   emit_int8(0x39);
6434   emit_int8((unsigned char)(0xC0 | encode));
6435   // 0x00 - extract from bits 127:0
6436   // 0x01 - extract from bits 255:128
6437   // 0x02 - extract from bits 383:256
6438   // 0x03 - extract from bits 511:384
6439   emit_int8(imm8 & 0x03);
6440 }
6441 
6442 void Assembler::vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6443   assert(VM_Version::supports_evex(), "");
6444   assert(imm8 <= 0x01, "imm8: %u", imm8);
6445   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6446   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6447   emit_int8(0x3B);
6448   emit_int8((unsigned char)(0xC0 | encode));
6449   // 0x00 - extract from lower 256 bits
6450   // 0x01 - extract from upper 256 bits
6451   emit_int8(imm8 & 0x01);
6452 }
6453 
6454 
6455 // vextractf forms
6456 
6457 void Assembler::vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6458   assert(VM_Version::supports_avx(), "");
6459   assert(imm8 <= 0x01, "imm8: %u", imm8);
6460   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6461   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6462   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6463   emit_int8(0x19);
6464   emit_int8((unsigned char)(0xC0 | encode));
6465   // 0x00 - extract from lower 128 bits
6466   // 0x01 - extract from upper 128 bits
6467   emit_int8(imm8 & 0x01);
6468 }
6469 
6470 void Assembler::vextractf128(Address dst, XMMRegister src, uint8_t imm8) {
6471   assert(VM_Version::supports_avx(), "");
6472   assert(src != xnoreg, "sanity");
6473   assert(imm8 <= 0x01, "imm8: %u", imm8);
6474   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6475   InstructionMark im(this);
6476   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6477   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6478   attributes.reset_is_clear_context();
6479   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6480   emit_int8(0x19);
6481   emit_operand(src, dst);
6482   // 0x00 - extract from lower 128 bits
6483   // 0x01 - extract from upper 128 bits
6484   emit_int8(imm8 & 0x01);
6485 }
6486 
6487 void Assembler::vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6488   assert(VM_Version::supports_avx(), "");
6489   assert(imm8 <= 0x03, "imm8: %u", imm8);
6490   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6491   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6492   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6493   emit_int8(0x19);
6494   emit_int8((unsigned char)(0xC0 | encode));
6495   // 0x00 - extract from bits 127:0
6496   // 0x01 - extract from bits 255:128
6497   // 0x02 - extract from bits 383:256
6498   // 0x03 - extract from bits 511:384
6499   emit_int8(imm8 & 0x03);
6500 }
6501 
6502 void Assembler::vextractf32x4(Address dst, XMMRegister src, uint8_t imm8) {
6503   assert(VM_Version::supports_evex(), "");
6504   assert(src != xnoreg, "sanity");
6505   assert(imm8 <= 0x03, "imm8: %u", imm8);
6506   InstructionMark im(this);
6507   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6508   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6509   attributes.reset_is_clear_context();
6510   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6511   emit_int8(0x19);
6512   emit_operand(src, dst);
6513   // 0x00 - extract from bits 127:0
6514   // 0x01 - extract from bits 255:128
6515   // 0x02 - extract from bits 383:256
6516   // 0x03 - extract from bits 511:384
6517   emit_int8(imm8 & 0x03);
6518 }
6519 
6520 void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6521   assert(VM_Version::supports_avx512dq(), "");
6522   assert(imm8 <= 0x03, "imm8: %u", imm8);
6523   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6524   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6525   emit_int8(0x19);
6526   emit_int8((unsigned char)(0xC0 | encode));
6527   // 0x00 - extract from bits 127:0
6528   // 0x01 - extract from bits 255:128
6529   // 0x02 - extract from bits 383:256
6530   // 0x03 - extract from bits 511:384
6531   emit_int8(imm8 & 0x03);
6532 }
6533 
6534 void Assembler::vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6535   assert(VM_Version::supports_evex(), "");
6536   assert(imm8 <= 0x01, "imm8: %u", imm8);
6537   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6538   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6539   emit_int8(0x1B);
6540   emit_int8((unsigned char)(0xC0 | encode));
6541   // 0x00 - extract from lower 256 bits
6542   // 0x01 - extract from upper 256 bits
6543   emit_int8(imm8 & 0x01);
6544 }
6545 
6546 void Assembler::vextractf64x4(Address dst, XMMRegister src, uint8_t imm8) {
6547   assert(VM_Version::supports_evex(), "");
6548   assert(src != xnoreg, "sanity");
6549   assert(imm8 <= 0x01, "imm8: %u", imm8);
6550   InstructionMark im(this);
6551   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6552   attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */  EVEX_64bit);
6553   attributes.reset_is_clear_context();
6554   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6555   emit_int8(0x1B);
6556   emit_operand(src, dst);
6557   // 0x00 - extract from lower 256 bits
6558   // 0x01 - extract from upper 256 bits
6559   emit_int8(imm8 & 0x01);
6560 }
6561 
6562 
6563 // legacy word/dword replicate
6564 void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
6565   assert(VM_Version::supports_avx2(), "");
6566   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6567   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6568   emit_int8(0x79);
6569   emit_int8((unsigned char)(0xC0 | encode));
6570 }
6571 
6572 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
6573   assert(VM_Version::supports_avx2(), "");
6574   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6575   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6576   emit_int8(0x58);
6577   emit_int8((unsigned char)(0xC0 | encode));
6578 }
6579 
6580 
6581 // xmm/mem sourced byte/word/dword/qword replicate
6582 
6583 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6584 void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
6585   assert(VM_Version::supports_evex(), "");
6586   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6587   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6588   emit_int8(0x78);
6589   emit_int8((unsigned char)(0xC0 | encode));
6590 }
6591 
6592 void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
6593   assert(VM_Version::supports_evex(), "");
6594   assert(dst != xnoreg, "sanity");
6595   InstructionMark im(this);
6596   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6597   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
6598   // swap src<->dst for encoding
6599   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6600   emit_int8(0x78);
6601   emit_operand(dst, src);
6602 }
6603 
6604 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6605 void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
6606   assert(VM_Version::supports_evex(), "");
6607   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6608   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6609   emit_int8(0x79);
6610   emit_int8((unsigned char)(0xC0 | encode));
6611 }
6612 
6613 void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) {
6614   assert(VM_Version::supports_evex(), "");
6615   assert(dst != xnoreg, "sanity");
6616   InstructionMark im(this);
6617   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6618   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
6619   // swap src<->dst for encoding
6620   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6621   emit_int8(0x79);
6622   emit_operand(dst, src);
6623 }
6624 
6625 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
6626 void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
6627   assert(VM_Version::supports_evex(), "");
6628   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6629   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6630   emit_int8(0x58);
6631   emit_int8((unsigned char)(0xC0 | encode));
6632 }
6633 
6634 void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
6635   assert(VM_Version::supports_evex(), "");
6636   assert(dst != xnoreg, "sanity");
6637   InstructionMark im(this);
6638   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6639   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
6640   // swap src<->dst for encoding
6641   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6642   emit_int8(0x58);
6643   emit_operand(dst, src);
6644 }
6645 
6646 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
6647 void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
6648   assert(VM_Version::supports_evex(), "");
6649   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6650   attributes.set_rex_vex_w_reverted();
6651   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6652   emit_int8(0x59);
6653   emit_int8((unsigned char)(0xC0 | encode));
6654 }
6655 
6656 void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
6657   assert(VM_Version::supports_evex(), "");
6658   assert(dst != xnoreg, "sanity");
6659   InstructionMark im(this);
6660   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6661   attributes.set_rex_vex_w_reverted();
6662   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
6663   // swap src<->dst for encoding
6664   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6665   emit_int8(0x59);
6666   emit_operand(dst, src);
6667 }
6668 
6669 
6670 // scalar single/double precision replicate
6671 
6672 // duplicate single precision data from src into programmed locations in dest : requires AVX512VL
6673 void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
6674   assert(VM_Version::supports_evex(), "");
6675   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6676   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6677   emit_int8(0x18);
6678   emit_int8((unsigned char)(0xC0 | encode));
6679 }
6680 
6681 void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
6682   assert(VM_Version::supports_evex(), "");
6683   assert(dst != xnoreg, "sanity");
6684   InstructionMark im(this);
6685   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6686   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
6687   // swap src<->dst for encoding
6688   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6689   emit_int8(0x18);
6690   emit_operand(dst, src);
6691 }
6692 
6693 // duplicate double precision data from src into programmed locations in dest : requires AVX512VL
6694 void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
6695   assert(VM_Version::supports_evex(), "");
6696   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6697   attributes.set_rex_vex_w_reverted();
6698   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6699   emit_int8(0x19);
6700   emit_int8((unsigned char)(0xC0 | encode));
6701 }
6702 
6703 void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
6704   assert(VM_Version::supports_evex(), "");
6705   assert(dst != xnoreg, "sanity");
6706   InstructionMark im(this);
6707   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6708   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
6709   attributes.set_rex_vex_w_reverted();
6710   // swap src<->dst for encoding
6711   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6712   emit_int8(0x19);
6713   emit_operand(dst, src);
6714 }
6715 
6716 
6717 // gpr source broadcast forms
6718 
6719 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6720 void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
6721   assert(VM_Version::supports_evex(), "");
6722   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6723   attributes.set_is_evex_instruction();
6724   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6725   emit_int8(0x7A);
6726   emit_int8((unsigned char)(0xC0 | encode));
6727 }
6728 
6729 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6730 void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
6731   assert(VM_Version::supports_evex(), "");
6732   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6733   attributes.set_is_evex_instruction();
6734   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6735   emit_int8(0x7B);
6736   emit_int8((unsigned char)(0xC0 | encode));
6737 }
6738 
6739 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
6740 void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
6741   assert(VM_Version::supports_evex(), "");
6742   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6743   attributes.set_is_evex_instruction();
6744   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6745   emit_int8(0x7C);
6746   emit_int8((unsigned char)(0xC0 | encode));
6747 }
6748 
6749 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
6750 void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
6751   assert(VM_Version::supports_evex(), "");
6752   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6753   attributes.set_is_evex_instruction();
6754   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6755   emit_int8(0x7C);
6756   emit_int8((unsigned char)(0xC0 | encode));
6757 }
6758 
6759 
6760 // Carry-Less Multiplication Quadword
6761 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
6762   assert(VM_Version::supports_clmul(), "");
6763   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
6764   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6765   emit_int8(0x44);
6766   emit_int8((unsigned char)(0xC0 | encode));
6767   emit_int8((unsigned char)mask);
6768 }
6769 
6770 // Carry-Less Multiplication Quadword
6771 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
6772   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
6773   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
6774   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6775   emit_int8(0x44);
6776   emit_int8((unsigned char)(0xC0 | encode));
6777   emit_int8((unsigned char)mask);
6778 }
6779 
6780 void Assembler::vzeroupper() {
6781   if (VM_Version::supports_vzeroupper()) {
6782     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
6783     (void)vex_prefix_and_encode(0, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6784     emit_int8(0x77);
6785   }
6786 }
6787 
6788 #ifndef _LP64
6789 // 32bit only pieces of the assembler
6790 
6791 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
6792   // NO PREFIX AS NEVER 64BIT
6793   InstructionMark im(this);
6794   emit_int8((unsigned char)0x81);
6795   emit_int8((unsigned char)(0xF8 | src1->encoding()));
6796   emit_data(imm32, rspec, 0);
6797 }
6798 
6799 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
6800   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
6801   InstructionMark im(this);
6802   emit_int8((unsigned char)0x81);
6803   emit_operand(rdi, src1);
6804   emit_data(imm32, rspec, 0);
6805 }
6806 
6807 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
6808 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
6809 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
6810 void Assembler::cmpxchg8(Address adr) {
6811   InstructionMark im(this);
6812   emit_int8(0x0F);
6813   emit_int8((unsigned char)0xC7);
6814   emit_operand(rcx, adr);
6815 }
6816 
6817 void Assembler::decl(Register dst) {
6818   // Don't use it directly. Use MacroAssembler::decrementl() instead.
6819  emit_int8(0x48 | dst->encoding());
6820 }
6821 
6822 #endif // _LP64
6823 
6824 // 64bit typically doesn't use the x87 but needs to for the trig funcs
6825 
6826 void Assembler::fabs() {
6827   emit_int8((unsigned char)0xD9);
6828   emit_int8((unsigned char)0xE1);
6829 }
6830 
6831 void Assembler::fadd(int i) {
6832   emit_farith(0xD8, 0xC0, i);
6833 }
6834 
6835 void Assembler::fadd_d(Address src) {
6836   InstructionMark im(this);
6837   emit_int8((unsigned char)0xDC);
6838   emit_operand32(rax, src);
6839 }
6840 
6841 void Assembler::fadd_s(Address src) {
6842   InstructionMark im(this);
6843   emit_int8((unsigned char)0xD8);
6844   emit_operand32(rax, src);
6845 }
6846 
6847 void Assembler::fadda(int i) {
6848   emit_farith(0xDC, 0xC0, i);
6849 }
6850 
6851 void Assembler::faddp(int i) {
6852   emit_farith(0xDE, 0xC0, i);
6853 }
6854 
6855 void Assembler::fchs() {
6856   emit_int8((unsigned char)0xD9);
6857   emit_int8((unsigned char)0xE0);
6858 }
6859 
6860 void Assembler::fcom(int i) {
6861   emit_farith(0xD8, 0xD0, i);
6862 }
6863 
6864 void Assembler::fcomp(int i) {
6865   emit_farith(0xD8, 0xD8, i);
6866 }
6867 
6868 void Assembler::fcomp_d(Address src) {
6869   InstructionMark im(this);
6870   emit_int8((unsigned char)0xDC);
6871   emit_operand32(rbx, src);
6872 }
6873 
6874 void Assembler::fcomp_s(Address src) {
6875   InstructionMark im(this);
6876   emit_int8((unsigned char)0xD8);
6877   emit_operand32(rbx, src);
6878 }
6879 
6880 void Assembler::fcompp() {
6881   emit_int8((unsigned char)0xDE);
6882   emit_int8((unsigned char)0xD9);
6883 }
6884 
6885 void Assembler::fcos() {
6886   emit_int8((unsigned char)0xD9);
6887   emit_int8((unsigned char)0xFF);
6888 }
6889 
6890 void Assembler::fdecstp() {
6891   emit_int8((unsigned char)0xD9);
6892   emit_int8((unsigned char)0xF6);
6893 }
6894 
6895 void Assembler::fdiv(int i) {
6896   emit_farith(0xD8, 0xF0, i);
6897 }
6898 
6899 void Assembler::fdiv_d(Address src) {
6900   InstructionMark im(this);
6901   emit_int8((unsigned char)0xDC);
6902   emit_operand32(rsi, src);
6903 }
6904 
6905 void Assembler::fdiv_s(Address src) {
6906   InstructionMark im(this);
6907   emit_int8((unsigned char)0xD8);
6908   emit_operand32(rsi, src);
6909 }
6910 
6911 void Assembler::fdiva(int i) {
6912   emit_farith(0xDC, 0xF8, i);
6913 }
6914 
6915 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
6916 //       is erroneous for some of the floating-point instructions below.
6917 
6918 void Assembler::fdivp(int i) {
6919   emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
6920 }
6921 
6922 void Assembler::fdivr(int i) {
6923   emit_farith(0xD8, 0xF8, i);
6924 }
6925 
6926 void Assembler::fdivr_d(Address src) {
6927   InstructionMark im(this);
6928   emit_int8((unsigned char)0xDC);
6929   emit_operand32(rdi, src);
6930 }
6931 
6932 void Assembler::fdivr_s(Address src) {
6933   InstructionMark im(this);
6934   emit_int8((unsigned char)0xD8);
6935   emit_operand32(rdi, src);
6936 }
6937 
6938 void Assembler::fdivra(int i) {
6939   emit_farith(0xDC, 0xF0, i);
6940 }
6941 
6942 void Assembler::fdivrp(int i) {
6943   emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
6944 }
6945 
6946 void Assembler::ffree(int i) {
6947   emit_farith(0xDD, 0xC0, i);
6948 }
6949 
6950 void Assembler::fild_d(Address adr) {
6951   InstructionMark im(this);
6952   emit_int8((unsigned char)0xDF);
6953   emit_operand32(rbp, adr);
6954 }
6955 
6956 void Assembler::fild_s(Address adr) {
6957   InstructionMark im(this);
6958   emit_int8((unsigned char)0xDB);
6959   emit_operand32(rax, adr);
6960 }
6961 
6962 void Assembler::fincstp() {
6963   emit_int8((unsigned char)0xD9);
6964   emit_int8((unsigned char)0xF7);
6965 }
6966 
6967 void Assembler::finit() {
6968   emit_int8((unsigned char)0x9B);
6969   emit_int8((unsigned char)0xDB);
6970   emit_int8((unsigned char)0xE3);
6971 }
6972 
6973 void Assembler::fist_s(Address adr) {
6974   InstructionMark im(this);
6975   emit_int8((unsigned char)0xDB);
6976   emit_operand32(rdx, adr);
6977 }
6978 
6979 void Assembler::fistp_d(Address adr) {
6980   InstructionMark im(this);
6981   emit_int8((unsigned char)0xDF);
6982   emit_operand32(rdi, adr);
6983 }
6984 
6985 void Assembler::fistp_s(Address adr) {
6986   InstructionMark im(this);
6987   emit_int8((unsigned char)0xDB);
6988   emit_operand32(rbx, adr);
6989 }
6990 
6991 void Assembler::fld1() {
6992   emit_int8((unsigned char)0xD9);
6993   emit_int8((unsigned char)0xE8);
6994 }
6995 
6996 void Assembler::fld_d(Address adr) {
6997   InstructionMark im(this);
6998   emit_int8((unsigned char)0xDD);
6999   emit_operand32(rax, adr);
7000 }
7001 
7002 void Assembler::fld_s(Address adr) {
7003   InstructionMark im(this);
7004   emit_int8((unsigned char)0xD9);
7005   emit_operand32(rax, adr);
7006 }
7007 
7008 
7009 void Assembler::fld_s(int index) {
7010   emit_farith(0xD9, 0xC0, index);
7011 }
7012 
7013 void Assembler::fld_x(Address adr) {
7014   InstructionMark im(this);
7015   emit_int8((unsigned char)0xDB);
7016   emit_operand32(rbp, adr);
7017 }
7018 
7019 void Assembler::fldcw(Address src) {
7020   InstructionMark im(this);
7021   emit_int8((unsigned char)0xD9);
7022   emit_operand32(rbp, src);
7023 }
7024 
7025 void Assembler::fldenv(Address src) {
7026   InstructionMark im(this);
7027   emit_int8((unsigned char)0xD9);
7028   emit_operand32(rsp, src);
7029 }
7030 
7031 void Assembler::fldlg2() {
7032   emit_int8((unsigned char)0xD9);
7033   emit_int8((unsigned char)0xEC);
7034 }
7035 
7036 void Assembler::fldln2() {
7037   emit_int8((unsigned char)0xD9);
7038   emit_int8((unsigned char)0xED);
7039 }
7040 
7041 void Assembler::fldz() {
7042   emit_int8((unsigned char)0xD9);
7043   emit_int8((unsigned char)0xEE);
7044 }
7045 
7046 void Assembler::flog() {
7047   fldln2();
7048   fxch();
7049   fyl2x();
7050 }
7051 
7052 void Assembler::flog10() {
7053   fldlg2();
7054   fxch();
7055   fyl2x();
7056 }
7057 
7058 void Assembler::fmul(int i) {
7059   emit_farith(0xD8, 0xC8, i);
7060 }
7061 
7062 void Assembler::fmul_d(Address src) {
7063   InstructionMark im(this);
7064   emit_int8((unsigned char)0xDC);
7065   emit_operand32(rcx, src);
7066 }
7067 
7068 void Assembler::fmul_s(Address src) {
7069   InstructionMark im(this);
7070   emit_int8((unsigned char)0xD8);
7071   emit_operand32(rcx, src);
7072 }
7073 
7074 void Assembler::fmula(int i) {
7075   emit_farith(0xDC, 0xC8, i);
7076 }
7077 
7078 void Assembler::fmulp(int i) {
7079   emit_farith(0xDE, 0xC8, i);
7080 }
7081 
7082 void Assembler::fnsave(Address dst) {
7083   InstructionMark im(this);
7084   emit_int8((unsigned char)0xDD);
7085   emit_operand32(rsi, dst);
7086 }
7087 
7088 void Assembler::fnstcw(Address src) {
7089   InstructionMark im(this);
7090   emit_int8((unsigned char)0x9B);
7091   emit_int8((unsigned char)0xD9);
7092   emit_operand32(rdi, src);
7093 }
7094 
7095 void Assembler::fnstsw_ax() {
7096   emit_int8((unsigned char)0xDF);
7097   emit_int8((unsigned char)0xE0);
7098 }
7099 
7100 void Assembler::fprem() {
7101   emit_int8((unsigned char)0xD9);
7102   emit_int8((unsigned char)0xF8);
7103 }
7104 
7105 void Assembler::fprem1() {
7106   emit_int8((unsigned char)0xD9);
7107   emit_int8((unsigned char)0xF5);
7108 }
7109 
7110 void Assembler::frstor(Address src) {
7111   InstructionMark im(this);
7112   emit_int8((unsigned char)0xDD);
7113   emit_operand32(rsp, src);
7114 }
7115 
7116 void Assembler::fsin() {
7117   emit_int8((unsigned char)0xD9);
7118   emit_int8((unsigned char)0xFE);
7119 }
7120 
7121 void Assembler::fsqrt() {
7122   emit_int8((unsigned char)0xD9);
7123   emit_int8((unsigned char)0xFA);
7124 }
7125 
7126 void Assembler::fst_d(Address adr) {
7127   InstructionMark im(this);
7128   emit_int8((unsigned char)0xDD);
7129   emit_operand32(rdx, adr);
7130 }
7131 
7132 void Assembler::fst_s(Address adr) {
7133   InstructionMark im(this);
7134   emit_int8((unsigned char)0xD9);
7135   emit_operand32(rdx, adr);
7136 }
7137 
7138 void Assembler::fstp_d(Address adr) {
7139   InstructionMark im(this);
7140   emit_int8((unsigned char)0xDD);
7141   emit_operand32(rbx, adr);
7142 }
7143 
7144 void Assembler::fstp_d(int index) {
7145   emit_farith(0xDD, 0xD8, index);
7146 }
7147 
7148 void Assembler::fstp_s(Address adr) {
7149   InstructionMark im(this);
7150   emit_int8((unsigned char)0xD9);
7151   emit_operand32(rbx, adr);
7152 }
7153 
7154 void Assembler::fstp_x(Address adr) {
7155   InstructionMark im(this);
7156   emit_int8((unsigned char)0xDB);
7157   emit_operand32(rdi, adr);
7158 }
7159 
7160 void Assembler::fsub(int i) {
7161   emit_farith(0xD8, 0xE0, i);
7162 }
7163 
7164 void Assembler::fsub_d(Address src) {
7165   InstructionMark im(this);
7166   emit_int8((unsigned char)0xDC);
7167   emit_operand32(rsp, src);
7168 }
7169 
7170 void Assembler::fsub_s(Address src) {
7171   InstructionMark im(this);
7172   emit_int8((unsigned char)0xD8);
7173   emit_operand32(rsp, src);
7174 }
7175 
7176 void Assembler::fsuba(int i) {
7177   emit_farith(0xDC, 0xE8, i);
7178 }
7179 
7180 void Assembler::fsubp(int i) {
7181   emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
7182 }
7183 
7184 void Assembler::fsubr(int i) {
7185   emit_farith(0xD8, 0xE8, i);
7186 }
7187 
7188 void Assembler::fsubr_d(Address src) {
7189   InstructionMark im(this);
7190   emit_int8((unsigned char)0xDC);
7191   emit_operand32(rbp, src);
7192 }
7193 
7194 void Assembler::fsubr_s(Address src) {
7195   InstructionMark im(this);
7196   emit_int8((unsigned char)0xD8);
7197   emit_operand32(rbp, src);
7198 }
7199 
7200 void Assembler::fsubra(int i) {
7201   emit_farith(0xDC, 0xE0, i);
7202 }
7203 
7204 void Assembler::fsubrp(int i) {
7205   emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
7206 }
7207 
7208 void Assembler::ftan() {
7209   emit_int8((unsigned char)0xD9);
7210   emit_int8((unsigned char)0xF2);
7211   emit_int8((unsigned char)0xDD);
7212   emit_int8((unsigned char)0xD8);
7213 }
7214 
7215 void Assembler::ftst() {
7216   emit_int8((unsigned char)0xD9);
7217   emit_int8((unsigned char)0xE4);
7218 }
7219 
7220 void Assembler::fucomi(int i) {
7221   // make sure the instruction is supported (introduced for P6, together with cmov)
7222   guarantee(VM_Version::supports_cmov(), "illegal instruction");
7223   emit_farith(0xDB, 0xE8, i);
7224 }
7225 
7226 void Assembler::fucomip(int i) {
7227   // make sure the instruction is supported (introduced for P6, together with cmov)
7228   guarantee(VM_Version::supports_cmov(), "illegal instruction");
7229   emit_farith(0xDF, 0xE8, i);
7230 }
7231 
7232 void Assembler::fwait() {
7233   emit_int8((unsigned char)0x9B);
7234 }
7235 
7236 void Assembler::fxch(int i) {
7237   emit_farith(0xD9, 0xC8, i);
7238 }
7239 
7240 void Assembler::fyl2x() {
7241   emit_int8((unsigned char)0xD9);
7242   emit_int8((unsigned char)0xF1);
7243 }
7244 
7245 void Assembler::frndint() {
7246   emit_int8((unsigned char)0xD9);
7247   emit_int8((unsigned char)0xFC);
7248 }
7249 
7250 void Assembler::f2xm1() {
7251   emit_int8((unsigned char)0xD9);
7252   emit_int8((unsigned char)0xF0);
7253 }
7254 
7255 void Assembler::fldl2e() {
7256   emit_int8((unsigned char)0xD9);
7257   emit_int8((unsigned char)0xEA);
7258 }
7259 
7260 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
7261 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
7262 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
7263 static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
7264 
7265 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
7266 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
7267   if (pre > 0) {
7268     emit_int8(simd_pre[pre]);
7269   }
7270   if (rex_w) {
7271     prefixq(adr, xreg);
7272   } else {
7273     prefix(adr, xreg);
7274   }
7275   if (opc > 0) {
7276     emit_int8(0x0F);
7277     int opc2 = simd_opc[opc];
7278     if (opc2 > 0) {
7279       emit_int8(opc2);
7280     }
7281   }
7282 }
7283 
7284 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
7285   if (pre > 0) {
7286     emit_int8(simd_pre[pre]);
7287   }
7288   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : prefix_and_encode(dst_enc, src_enc);
7289   if (opc > 0) {
7290     emit_int8(0x0F);
7291     int opc2 = simd_opc[opc];
7292     if (opc2 > 0) {
7293       emit_int8(opc2);
7294     }
7295   }
7296   return encode;
7297 }
7298 
7299 
7300 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc) {
7301   int vector_len = _attributes->get_vector_len();
7302   bool vex_w = _attributes->is_rex_vex_w();
7303   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
7304     prefix(VEX_3bytes);
7305 
7306     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
7307     byte1 = (~byte1) & 0xE0;
7308     byte1 |= opc;
7309     emit_int8(byte1);
7310 
7311     int byte2 = ((~nds_enc) & 0xf) << 3;
7312     byte2 |= (vex_w ? VEX_W : 0) | ((vector_len > 0) ? 4 : 0) | pre;
7313     emit_int8(byte2);
7314   } else {
7315     prefix(VEX_2bytes);
7316 
7317     int byte1 = vex_r ? VEX_R : 0;
7318     byte1 = (~byte1) & 0x80;
7319     byte1 |= ((~nds_enc) & 0xf) << 3;
7320     byte1 |= ((vector_len > 0 ) ? 4 : 0) | pre;
7321     emit_int8(byte1);
7322   }
7323 }
7324 
7325 // This is a 4 byte encoding
7326 void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v, int nds_enc, VexSimdPrefix pre, VexOpcode opc){
7327   // EVEX 0x62 prefix
7328   prefix(EVEX_4bytes);
7329   bool vex_w = _attributes->is_rex_vex_w();
7330   int evex_encoding = (vex_w ? VEX_W : 0);
7331   // EVEX.b is not currently used for broadcast of single element or data rounding modes
7332   _attributes->set_evex_encoding(evex_encoding);
7333 
7334   // P0: byte 2, initialized to RXBR`00mm
7335   // instead of not'd
7336   int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0);
7337   byte2 = (~byte2) & 0xF0;
7338   // confine opc opcode extensions in mm bits to lower two bits
7339   // of form {0F, 0F_38, 0F_3A}
7340   byte2 |= opc;
7341   emit_int8(byte2);
7342 
7343   // P1: byte 3 as Wvvvv1pp
7344   int byte3 = ((~nds_enc) & 0xf) << 3;
7345   // p[10] is always 1
7346   byte3 |= EVEX_F;
7347   byte3 |= (vex_w & 1) << 7;
7348   // confine pre opcode extensions in pp bits to lower two bits
7349   // of form {66, F3, F2}
7350   byte3 |= pre;
7351   emit_int8(byte3);
7352 
7353   // P2: byte 4 as zL'Lbv'aaa
7354   // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
7355   int byte4 = (_attributes->is_no_reg_mask()) ?
7356               0 :
7357               _attributes->get_embedded_opmask_register_specifier();
7358   // EVEX.v` for extending EVEX.vvvv or VIDX
7359   byte4 |= (evex_v ? 0: EVEX_V);
7360   // third EXEC.b for broadcast actions
7361   byte4 |= (_attributes->is_extended_context() ? EVEX_Rb : 0);
7362   // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
7363   byte4 |= ((_attributes->get_vector_len())& 0x3) << 5;
7364   // last is EVEX.z for zero/merge actions
7365   if (_attributes->is_no_reg_mask() == false) {
7366     byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0);
7367   }
7368   emit_int8(byte4);
7369 }
7370 
7371 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
7372   bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0;
7373   bool vex_b = adr.base_needs_rex();
7374   bool vex_x = adr.index_needs_rex();
7375   set_attributes(attributes);
7376   attributes->set_current_assembler(this);
7377 
7378   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
7379   if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
7380     switch (attributes->get_vector_len()) {
7381     case AVX_128bit:
7382     case AVX_256bit:
7383       attributes->set_is_legacy_mode();
7384       break;
7385     }
7386   }
7387 
7388   // For pure EVEX check and see if this instruction
7389   // is allowed in legacy mode and has resources which will
7390   // fit in it.  Pure EVEX instructions will use set_is_evex_instruction in their definition,
7391   // else that field is set when we encode to EVEX
7392   if (UseAVX > 2 && !attributes->is_legacy_mode() &&
7393       !_is_managed && !attributes->is_evex_instruction()) {
7394     if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
7395       bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
7396       if (check_register_bank) {
7397         // check nds_enc and xreg_enc for upper bank usage
7398         if (nds_enc < 16 && xreg_enc < 16) {
7399           attributes->set_is_legacy_mode();
7400         }
7401       } else {
7402         attributes->set_is_legacy_mode();
7403       }
7404     }
7405   }
7406 
7407   _is_managed = false;
7408   if (UseAVX > 2 && !attributes->is_legacy_mode())
7409   {
7410     bool evex_r = (xreg_enc >= 16);
7411     bool evex_v = (nds_enc >= 16);
7412     attributes->set_is_evex_instruction();
7413     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
7414   } else {
7415     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
7416       attributes->set_rex_vex_w(false);
7417     }
7418     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
7419   }
7420 }
7421 
7422 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
7423   bool vex_r = ((dst_enc & 8) == 8) ? 1 : 0;
7424   bool vex_b = ((src_enc & 8) == 8) ? 1 : 0;
7425   bool vex_x = false;
7426   set_attributes(attributes);
7427   attributes->set_current_assembler(this);
7428   bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
7429 
7430   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
7431   if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
7432     switch (attributes->get_vector_len()) {
7433     case AVX_128bit:
7434     case AVX_256bit:
7435       if (check_register_bank) {
7436         if (dst_enc >= 16 || nds_enc >= 16 || src_enc >= 16) {
7437           // up propagate arithmetic instructions to meet RA requirements
7438           attributes->set_vector_len(AVX_512bit);
7439         } else {
7440           attributes->set_is_legacy_mode();
7441         }
7442       } else {
7443         attributes->set_is_legacy_mode();
7444       }
7445       break;
7446     }
7447   }
7448 
7449   // For pure EVEX check and see if this instruction
7450   // is allowed in legacy mode and has resources which will
7451   // fit in it.  Pure EVEX instructions will use set_is_evex_instruction in their definition,
7452   // else that field is set when we encode to EVEX
7453   if (UseAVX > 2 && !attributes->is_legacy_mode() &&
7454       !_is_managed && !attributes->is_evex_instruction()) {
7455     if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
7456       if (check_register_bank) {
7457         // check dst_enc, nds_enc and src_enc for upper bank usage
7458         if (dst_enc < 16 && nds_enc < 16 && src_enc < 16) {
7459           attributes->set_is_legacy_mode();
7460         }
7461       } else {
7462         attributes->set_is_legacy_mode();
7463       }
7464     }
7465   }
7466 
7467   _is_managed = false;
7468   if (UseAVX > 2 && !attributes->is_legacy_mode())
7469   {
7470     bool evex_r = (dst_enc >= 16);
7471     bool evex_v = (nds_enc >= 16);
7472     // can use vex_x as bank extender on rm encoding
7473     vex_x = (src_enc >= 16);
7474     attributes->set_is_evex_instruction();
7475     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
7476   } else {
7477     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
7478       attributes->set_rex_vex_w(false);
7479     }
7480     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
7481   }
7482 
7483   // return modrm byte components for operands
7484   return (((dst_enc & 7) << 3) | (src_enc & 7));
7485 }
7486 
7487 
7488 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
7489                             VexOpcode opc, InstructionAttr *attributes) {
7490   if (UseAVX > 0) {
7491     int xreg_enc = xreg->encoding();
7492     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
7493     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, attributes);
7494   } else {
7495     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
7496     rex_prefix(adr, xreg, pre, opc, attributes->is_rex_vex_w());
7497   }
7498 }
7499 
7500 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
7501                                       VexOpcode opc, InstructionAttr *attributes) {
7502   int dst_enc = dst->encoding();
7503   int src_enc = src->encoding();
7504   if (UseAVX > 0) {
7505     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
7506     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes);
7507   } else {
7508     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
7509     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, attributes->is_rex_vex_w());
7510   }
7511 }
7512 
7513 void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
7514   assert(VM_Version::supports_avx(), "");
7515   assert(!VM_Version::supports_evex(), "");
7516   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7517   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7518   emit_int8((unsigned char)0xC2);
7519   emit_int8((unsigned char)(0xC0 | encode));
7520   emit_int8((unsigned char)(0xF & cop));
7521 }
7522 
7523 void Assembler::blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
7524   assert(VM_Version::supports_avx(), "");
7525   assert(!VM_Version::supports_evex(), "");
7526   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7527   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7528   emit_int8((unsigned char)0x4B);
7529   emit_int8((unsigned char)(0xC0 | encode));
7530   int src2_enc = src2->encoding();
7531   emit_int8((unsigned char)(0xF0 & src2_enc<<4));
7532 }
7533 
7534 void Assembler::cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
7535   assert(VM_Version::supports_avx(), "");
7536   assert(!VM_Version::supports_evex(), "");
7537   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7538   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
7539   emit_int8((unsigned char)0xC2);
7540   emit_int8((unsigned char)(0xC0 | encode));
7541   emit_int8((unsigned char)(0xF & cop));
7542 }
7543 
7544 void Assembler::blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
7545   assert(VM_Version::supports_avx(), "");
7546   assert(!VM_Version::supports_evex(), "");
7547   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7548   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7549   emit_int8((unsigned char)0x4A);
7550   emit_int8((unsigned char)(0xC0 | encode));
7551   int src2_enc = src2->encoding();
7552   emit_int8((unsigned char)(0xF0 & src2_enc<<4));
7553 }
7554 
7555 void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
7556   assert(VM_Version::supports_avx2(), "");
7557   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7558   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7559   emit_int8((unsigned char)0x02);
7560   emit_int8((unsigned char)(0xC0 | encode));
7561   emit_int8((unsigned char)imm8);
7562 }
7563 
7564 void Assembler::shlxl(Register dst, Register src1, Register src2) {
7565   assert(VM_Version::supports_bmi2(), "");
7566   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
7567   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7568   emit_int8((unsigned char)0xF7);
7569   emit_int8((unsigned char)(0xC0 | encode));
7570 }
7571 
7572 void Assembler::shlxq(Register dst, Register src1, Register src2) {
7573   assert(VM_Version::supports_bmi2(), "");
7574   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
7575   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7576   emit_int8((unsigned char)0xF7);
7577   emit_int8((unsigned char)(0xC0 | encode));
7578 }
7579 
7580 #ifndef _LP64
7581 
7582 void Assembler::incl(Register dst) {
7583   // Don't use it directly. Use MacroAssembler::incrementl() instead.
7584   emit_int8(0x40 | dst->encoding());
7585 }
7586 
7587 void Assembler::lea(Register dst, Address src) {
7588   leal(dst, src);
7589 }
7590 
7591 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) {
7592   InstructionMark im(this);
7593   emit_int8((unsigned char)0xC7);
7594   emit_operand(rax, dst);
7595   emit_data((int)imm32, rspec, 0);
7596 }
7597 
7598 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
7599   InstructionMark im(this);
7600   int encode = prefix_and_encode(dst->encoding());
7601   emit_int8((unsigned char)(0xB8 | encode));
7602   emit_data((int)imm32, rspec, 0);
7603 }
7604 
7605 void Assembler::popa() { // 32bit
7606   emit_int8(0x61);
7607 }
7608 
7609 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
7610   InstructionMark im(this);
7611   emit_int8(0x68);
7612   emit_data(imm32, rspec, 0);
7613 }
7614 
7615 void Assembler::pusha() { // 32bit
7616   emit_int8(0x60);
7617 }
7618 
7619 void Assembler::set_byte_if_not_zero(Register dst) {
7620   emit_int8(0x0F);
7621   emit_int8((unsigned char)0x95);
7622   emit_int8((unsigned char)(0xE0 | dst->encoding()));
7623 }
7624 
7625 void Assembler::shldl(Register dst, Register src) {
7626   emit_int8(0x0F);
7627   emit_int8((unsigned char)0xA5);
7628   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7629 }
7630 
7631 // 0F A4 / r ib
7632 void Assembler::shldl(Register dst, Register src, int8_t imm8) {
7633   emit_int8(0x0F);
7634   emit_int8((unsigned char)0xA4);
7635   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7636   emit_int8(imm8);
7637 }
7638 
7639 void Assembler::shrdl(Register dst, Register src) {
7640   emit_int8(0x0F);
7641   emit_int8((unsigned char)0xAD);
7642   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7643 }
7644 
7645 #else // LP64
7646 
7647 void Assembler::set_byte_if_not_zero(Register dst) {
7648   int enc = prefix_and_encode(dst->encoding(), true);
7649   emit_int8(0x0F);
7650   emit_int8((unsigned char)0x95);
7651   emit_int8((unsigned char)(0xE0 | enc));
7652 }
7653 
7654 // 64bit only pieces of the assembler
7655 // This should only be used by 64bit instructions that can use rip-relative
7656 // it cannot be used by instructions that want an immediate value.
7657 
7658 bool Assembler::reachable(AddressLiteral adr) {
7659   int64_t disp;
7660   // None will force a 64bit literal to the code stream. Likely a placeholder
7661   // for something that will be patched later and we need to certain it will
7662   // always be reachable.
7663   if (adr.reloc() == relocInfo::none) {
7664     return false;
7665   }
7666   if (adr.reloc() == relocInfo::internal_word_type) {
7667     // This should be rip relative and easily reachable.
7668     return true;
7669   }
7670   if (adr.reloc() == relocInfo::virtual_call_type ||
7671       adr.reloc() == relocInfo::opt_virtual_call_type ||
7672       adr.reloc() == relocInfo::static_call_type ||
7673       adr.reloc() == relocInfo::static_stub_type ) {
7674     // This should be rip relative within the code cache and easily
7675     // reachable until we get huge code caches. (At which point
7676     // ic code is going to have issues).
7677     return true;
7678   }
7679   if (adr.reloc() != relocInfo::external_word_type &&
7680       adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
7681       adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
7682       adr.reloc() != relocInfo::runtime_call_type ) {
7683     return false;
7684   }
7685 
7686   // Stress the correction code
7687   if (ForceUnreachable) {
7688     // Must be runtimecall reloc, see if it is in the codecache
7689     // Flipping stuff in the codecache to be unreachable causes issues
7690     // with things like inline caches where the additional instructions
7691     // are not handled.
7692     if (CodeCache::find_blob(adr._target) == NULL) {
7693       return false;
7694     }
7695   }
7696   // For external_word_type/runtime_call_type if it is reachable from where we
7697   // are now (possibly a temp buffer) and where we might end up
7698   // anywhere in the codeCache then we are always reachable.
7699   // This would have to change if we ever save/restore shared code
7700   // to be more pessimistic.
7701   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
7702   if (!is_simm32(disp)) return false;
7703   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
7704   if (!is_simm32(disp)) return false;
7705 
7706   disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
7707 
7708   // Because rip relative is a disp + address_of_next_instruction and we
7709   // don't know the value of address_of_next_instruction we apply a fudge factor
7710   // to make sure we will be ok no matter the size of the instruction we get placed into.
7711   // We don't have to fudge the checks above here because they are already worst case.
7712 
7713   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
7714   // + 4 because better safe than sorry.
7715   const int fudge = 12 + 4;
7716   if (disp < 0) {
7717     disp -= fudge;
7718   } else {
7719     disp += fudge;
7720   }
7721   return is_simm32(disp);
7722 }
7723 
7724 // Check if the polling page is not reachable from the code cache using rip-relative
7725 // addressing.
7726 bool Assembler::is_polling_page_far() {
7727   intptr_t addr = (intptr_t)os::get_polling_page();
7728   return ForceUnreachable ||
7729          !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
7730          !is_simm32(addr - (intptr_t)CodeCache::high_bound());
7731 }
7732 
7733 void Assembler::emit_data64(jlong data,
7734                             relocInfo::relocType rtype,
7735                             int format) {
7736   if (rtype == relocInfo::none) {
7737     emit_int64(data);
7738   } else {
7739     emit_data64(data, Relocation::spec_simple(rtype), format);
7740   }
7741 }
7742 
7743 void Assembler::emit_data64(jlong data,
7744                             RelocationHolder const& rspec,
7745                             int format) {
7746   assert(imm_operand == 0, "default format must be immediate in this file");
7747   assert(imm_operand == format, "must be immediate");
7748   assert(inst_mark() != NULL, "must be inside InstructionMark");
7749   // Do not use AbstractAssembler::relocate, which is not intended for
7750   // embedded words.  Instead, relocate to the enclosing instruction.
7751   code_section()->relocate(inst_mark(), rspec, format);
7752 #ifdef ASSERT
7753   check_relocation(rspec, format);
7754 #endif
7755   emit_int64(data);
7756 }
7757 
7758 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
7759   if (reg_enc >= 8) {
7760     prefix(REX_B);
7761     reg_enc -= 8;
7762   } else if (byteinst && reg_enc >= 4) {
7763     prefix(REX);
7764   }
7765   return reg_enc;
7766 }
7767 
7768 int Assembler::prefixq_and_encode(int reg_enc) {
7769   if (reg_enc < 8) {
7770     prefix(REX_W);
7771   } else {
7772     prefix(REX_WB);
7773     reg_enc -= 8;
7774   }
7775   return reg_enc;
7776 }
7777 
7778 int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte) {
7779   if (dst_enc < 8) {
7780     if (src_enc >= 8) {
7781       prefix(REX_B);
7782       src_enc -= 8;
7783     } else if ((src_is_byte && src_enc >= 4) || (dst_is_byte && dst_enc >= 4)) {
7784       prefix(REX);
7785     }
7786   } else {
7787     if (src_enc < 8) {
7788       prefix(REX_R);
7789     } else {
7790       prefix(REX_RB);
7791       src_enc -= 8;
7792     }
7793     dst_enc -= 8;
7794   }
7795   return dst_enc << 3 | src_enc;
7796 }
7797 
7798 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
7799   if (dst_enc < 8) {
7800     if (src_enc < 8) {
7801       prefix(REX_W);
7802     } else {
7803       prefix(REX_WB);
7804       src_enc -= 8;
7805     }
7806   } else {
7807     if (src_enc < 8) {
7808       prefix(REX_WR);
7809     } else {
7810       prefix(REX_WRB);
7811       src_enc -= 8;
7812     }
7813     dst_enc -= 8;
7814   }
7815   return dst_enc << 3 | src_enc;
7816 }
7817 
7818 void Assembler::prefix(Register reg) {
7819   if (reg->encoding() >= 8) {
7820     prefix(REX_B);
7821   }
7822 }
7823 
7824 void Assembler::prefix(Register dst, Register src, Prefix p) {
7825   if (src->encoding() >= 8) {
7826     p = (Prefix)(p | REX_B);
7827   }
7828   if (dst->encoding() >= 8) {
7829     p = (Prefix)( p | REX_R);
7830   }
7831   if (p != Prefix_EMPTY) {
7832     // do not generate an empty prefix
7833     prefix(p);
7834   }
7835 }
7836 
7837 void Assembler::prefix(Register dst, Address adr, Prefix p) {
7838   if (adr.base_needs_rex()) {
7839     if (adr.index_needs_rex()) {
7840       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
7841     } else {
7842       prefix(REX_B);
7843     }
7844   } else {
7845     if (adr.index_needs_rex()) {
7846       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
7847     }
7848   }
7849   if (dst->encoding() >= 8) {
7850     p = (Prefix)(p | REX_R);
7851   }
7852   if (p != Prefix_EMPTY) {
7853     // do not generate an empty prefix
7854     prefix(p);
7855   }
7856 }
7857 
7858 void Assembler::prefix(Address adr) {
7859   if (adr.base_needs_rex()) {
7860     if (adr.index_needs_rex()) {
7861       prefix(REX_XB);
7862     } else {
7863       prefix(REX_B);
7864     }
7865   } else {
7866     if (adr.index_needs_rex()) {
7867       prefix(REX_X);
7868     }
7869   }
7870 }
7871 
7872 void Assembler::prefixq(Address adr) {
7873   if (adr.base_needs_rex()) {
7874     if (adr.index_needs_rex()) {
7875       prefix(REX_WXB);
7876     } else {
7877       prefix(REX_WB);
7878     }
7879   } else {
7880     if (adr.index_needs_rex()) {
7881       prefix(REX_WX);
7882     } else {
7883       prefix(REX_W);
7884     }
7885   }
7886 }
7887 
7888 
7889 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
7890   if (reg->encoding() < 8) {
7891     if (adr.base_needs_rex()) {
7892       if (adr.index_needs_rex()) {
7893         prefix(REX_XB);
7894       } else {
7895         prefix(REX_B);
7896       }
7897     } else {
7898       if (adr.index_needs_rex()) {
7899         prefix(REX_X);
7900       } else if (byteinst && reg->encoding() >= 4 ) {
7901         prefix(REX);
7902       }
7903     }
7904   } else {
7905     if (adr.base_needs_rex()) {
7906       if (adr.index_needs_rex()) {
7907         prefix(REX_RXB);
7908       } else {
7909         prefix(REX_RB);
7910       }
7911     } else {
7912       if (adr.index_needs_rex()) {
7913         prefix(REX_RX);
7914       } else {
7915         prefix(REX_R);
7916       }
7917     }
7918   }
7919 }
7920 
7921 void Assembler::prefixq(Address adr, Register src) {
7922   if (src->encoding() < 8) {
7923     if (adr.base_needs_rex()) {
7924       if (adr.index_needs_rex()) {
7925         prefix(REX_WXB);
7926       } else {
7927         prefix(REX_WB);
7928       }
7929     } else {
7930       if (adr.index_needs_rex()) {
7931         prefix(REX_WX);
7932       } else {
7933         prefix(REX_W);
7934       }
7935     }
7936   } else {
7937     if (adr.base_needs_rex()) {
7938       if (adr.index_needs_rex()) {
7939         prefix(REX_WRXB);
7940       } else {
7941         prefix(REX_WRB);
7942       }
7943     } else {
7944       if (adr.index_needs_rex()) {
7945         prefix(REX_WRX);
7946       } else {
7947         prefix(REX_WR);
7948       }
7949     }
7950   }
7951 }
7952 
7953 void Assembler::prefix(Address adr, XMMRegister reg) {
7954   if (reg->encoding() < 8) {
7955     if (adr.base_needs_rex()) {
7956       if (adr.index_needs_rex()) {
7957         prefix(REX_XB);
7958       } else {
7959         prefix(REX_B);
7960       }
7961     } else {
7962       if (adr.index_needs_rex()) {
7963         prefix(REX_X);
7964       }
7965     }
7966   } else {
7967     if (adr.base_needs_rex()) {
7968       if (adr.index_needs_rex()) {
7969         prefix(REX_RXB);
7970       } else {
7971         prefix(REX_RB);
7972       }
7973     } else {
7974       if (adr.index_needs_rex()) {
7975         prefix(REX_RX);
7976       } else {
7977         prefix(REX_R);
7978       }
7979     }
7980   }
7981 }
7982 
7983 void Assembler::prefixq(Address adr, XMMRegister src) {
7984   if (src->encoding() < 8) {
7985     if (adr.base_needs_rex()) {
7986       if (adr.index_needs_rex()) {
7987         prefix(REX_WXB);
7988       } else {
7989         prefix(REX_WB);
7990       }
7991     } else {
7992       if (adr.index_needs_rex()) {
7993         prefix(REX_WX);
7994       } else {
7995         prefix(REX_W);
7996       }
7997     }
7998   } else {
7999     if (adr.base_needs_rex()) {
8000       if (adr.index_needs_rex()) {
8001         prefix(REX_WRXB);
8002       } else {
8003         prefix(REX_WRB);
8004       }
8005     } else {
8006       if (adr.index_needs_rex()) {
8007         prefix(REX_WRX);
8008       } else {
8009         prefix(REX_WR);
8010       }
8011     }
8012   }
8013 }
8014 
8015 void Assembler::adcq(Register dst, int32_t imm32) {
8016   (void) prefixq_and_encode(dst->encoding());
8017   emit_arith(0x81, 0xD0, dst, imm32);
8018 }
8019 
8020 void Assembler::adcq(Register dst, Address src) {
8021   InstructionMark im(this);
8022   prefixq(src, dst);
8023   emit_int8(0x13);
8024   emit_operand(dst, src);
8025 }
8026 
8027 void Assembler::adcq(Register dst, Register src) {
8028   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8029   emit_arith(0x13, 0xC0, dst, src);
8030 }
8031 
8032 void Assembler::addq(Address dst, int32_t imm32) {
8033   InstructionMark im(this);
8034   prefixq(dst);
8035   emit_arith_operand(0x81, rax, dst,imm32);
8036 }
8037 
8038 void Assembler::addq(Address dst, Register src) {
8039   InstructionMark im(this);
8040   prefixq(dst, src);
8041   emit_int8(0x01);
8042   emit_operand(src, dst);
8043 }
8044 
8045 void Assembler::addq(Register dst, int32_t imm32) {
8046   (void) prefixq_and_encode(dst->encoding());
8047   emit_arith(0x81, 0xC0, dst, imm32);
8048 }
8049 
8050 void Assembler::addq(Register dst, Address src) {
8051   InstructionMark im(this);
8052   prefixq(src, dst);
8053   emit_int8(0x03);
8054   emit_operand(dst, src);
8055 }
8056 
8057 void Assembler::addq(Register dst, Register src) {
8058   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8059   emit_arith(0x03, 0xC0, dst, src);
8060 }
8061 
8062 void Assembler::adcxq(Register dst, Register src) {
8063   //assert(VM_Version::supports_adx(), "adx instructions not supported");
8064   emit_int8((unsigned char)0x66);
8065   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8066   emit_int8(0x0F);
8067   emit_int8(0x38);
8068   emit_int8((unsigned char)0xF6);
8069   emit_int8((unsigned char)(0xC0 | encode));
8070 }
8071 
8072 void Assembler::adoxq(Register dst, Register src) {
8073   //assert(VM_Version::supports_adx(), "adx instructions not supported");
8074   emit_int8((unsigned char)0xF3);
8075   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8076   emit_int8(0x0F);
8077   emit_int8(0x38);
8078   emit_int8((unsigned char)0xF6);
8079   emit_int8((unsigned char)(0xC0 | encode));
8080 }
8081 
8082 void Assembler::andq(Address dst, int32_t imm32) {
8083   InstructionMark im(this);
8084   prefixq(dst);
8085   emit_int8((unsigned char)0x81);
8086   emit_operand(rsp, dst, 4);
8087   emit_int32(imm32);
8088 }
8089 
8090 void Assembler::andq(Register dst, int32_t imm32) {
8091   (void) prefixq_and_encode(dst->encoding());
8092   emit_arith(0x81, 0xE0, dst, imm32);
8093 }
8094 
8095 void Assembler::andq(Register dst, Address src) {
8096   InstructionMark im(this);
8097   prefixq(src, dst);
8098   emit_int8(0x23);
8099   emit_operand(dst, src);
8100 }
8101 
8102 void Assembler::andq(Register dst, Register src) {
8103   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8104   emit_arith(0x23, 0xC0, dst, src);
8105 }
8106 
8107 void Assembler::andnq(Register dst, Register src1, Register src2) {
8108   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8109   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8110   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8111   emit_int8((unsigned char)0xF2);
8112   emit_int8((unsigned char)(0xC0 | encode));
8113 }
8114 
8115 void Assembler::andnq(Register dst, Register src1, Address src2) {
8116   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8117   InstructionMark im(this);
8118   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8119   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8120   emit_int8((unsigned char)0xF2);
8121   emit_operand(dst, src2);
8122 }
8123 
8124 void Assembler::bsfq(Register dst, Register src) {
8125   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8126   emit_int8(0x0F);
8127   emit_int8((unsigned char)0xBC);
8128   emit_int8((unsigned char)(0xC0 | encode));
8129 }
8130 
8131 void Assembler::bsrq(Register dst, Register src) {
8132   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8133   emit_int8(0x0F);
8134   emit_int8((unsigned char)0xBD);
8135   emit_int8((unsigned char)(0xC0 | encode));
8136 }
8137 
8138 void Assembler::bswapq(Register reg) {
8139   int encode = prefixq_and_encode(reg->encoding());
8140   emit_int8(0x0F);
8141   emit_int8((unsigned char)(0xC8 | encode));
8142 }
8143 
8144 void Assembler::blsiq(Register dst, Register src) {
8145   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8146   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8147   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8148   emit_int8((unsigned char)0xF3);
8149   emit_int8((unsigned char)(0xC0 | encode));
8150 }
8151 
8152 void Assembler::blsiq(Register dst, Address src) {
8153   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8154   InstructionMark im(this);
8155   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8156   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8157   emit_int8((unsigned char)0xF3);
8158   emit_operand(rbx, src);
8159 }
8160 
8161 void Assembler::blsmskq(Register dst, Register src) {
8162   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8163   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8164   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8165   emit_int8((unsigned char)0xF3);
8166   emit_int8((unsigned char)(0xC0 | encode));
8167 }
8168 
8169 void Assembler::blsmskq(Register dst, Address src) {
8170   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8171   InstructionMark im(this);
8172   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8173   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8174   emit_int8((unsigned char)0xF3);
8175   emit_operand(rdx, src);
8176 }
8177 
8178 void Assembler::blsrq(Register dst, Register src) {
8179   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8180   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8181   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8182   emit_int8((unsigned char)0xF3);
8183   emit_int8((unsigned char)(0xC0 | encode));
8184 }
8185 
8186 void Assembler::blsrq(Register dst, Address src) {
8187   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8188   InstructionMark im(this);
8189   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8190   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8191   emit_int8((unsigned char)0xF3);
8192   emit_operand(rcx, src);
8193 }
8194 
8195 void Assembler::cdqq() {
8196   prefix(REX_W);
8197   emit_int8((unsigned char)0x99);
8198 }
8199 
8200 void Assembler::clflush(Address adr) {
8201   prefix(adr);
8202   emit_int8(0x0F);
8203   emit_int8((unsigned char)0xAE);
8204   emit_operand(rdi, adr);
8205 }
8206 
8207 void Assembler::cmovq(Condition cc, Register dst, Register src) {
8208   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8209   emit_int8(0x0F);
8210   emit_int8(0x40 | cc);
8211   emit_int8((unsigned char)(0xC0 | encode));
8212 }
8213 
8214 void Assembler::cmovq(Condition cc, Register dst, Address src) {
8215   InstructionMark im(this);
8216   prefixq(src, dst);
8217   emit_int8(0x0F);
8218   emit_int8(0x40 | cc);
8219   emit_operand(dst, src);
8220 }
8221 
8222 void Assembler::cmpq(Address dst, int32_t imm32) {
8223   InstructionMark im(this);
8224   prefixq(dst);
8225   emit_int8((unsigned char)0x81);
8226   emit_operand(rdi, dst, 4);
8227   emit_int32(imm32);
8228 }
8229 
8230 void Assembler::cmpq(Register dst, int32_t imm32) {
8231   (void) prefixq_and_encode(dst->encoding());
8232   emit_arith(0x81, 0xF8, dst, imm32);
8233 }
8234 
8235 void Assembler::cmpq(Address dst, Register src) {
8236   InstructionMark im(this);
8237   prefixq(dst, src);
8238   emit_int8(0x3B);
8239   emit_operand(src, dst);
8240 }
8241 
8242 void Assembler::cmpq(Register dst, Register src) {
8243   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8244   emit_arith(0x3B, 0xC0, dst, src);
8245 }
8246 
8247 void Assembler::cmpq(Register dst, Address  src) {
8248   InstructionMark im(this);
8249   prefixq(src, dst);
8250   emit_int8(0x3B);
8251   emit_operand(dst, src);
8252 }
8253 
8254 void Assembler::cmpxchgq(Register reg, Address adr) {
8255   InstructionMark im(this);
8256   prefixq(adr, reg);
8257   emit_int8(0x0F);
8258   emit_int8((unsigned char)0xB1);
8259   emit_operand(reg, adr);
8260 }
8261 
8262 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
8263   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8264   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8265   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8266   emit_int8(0x2A);
8267   emit_int8((unsigned char)(0xC0 | encode));
8268 }
8269 
8270 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
8271   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8272   InstructionMark im(this);
8273   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8274   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
8275   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8276   emit_int8(0x2A);
8277   emit_operand(dst, src);
8278 }
8279 
8280 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
8281   NOT_LP64(assert(VM_Version::supports_sse(), ""));
8282   InstructionMark im(this);
8283   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8284   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
8285   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
8286   emit_int8(0x2A);
8287   emit_operand(dst, src);
8288 }
8289 
8290 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
8291   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8292   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8293   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8294   emit_int8(0x2C);
8295   emit_int8((unsigned char)(0xC0 | encode));
8296 }
8297 
8298 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
8299   NOT_LP64(assert(VM_Version::supports_sse(), ""));
8300   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8301   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
8302   emit_int8(0x2C);
8303   emit_int8((unsigned char)(0xC0 | encode));
8304 }
8305 
8306 void Assembler::decl(Register dst) {
8307   // Don't use it directly. Use MacroAssembler::decrementl() instead.
8308   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
8309   int encode = prefix_and_encode(dst->encoding());
8310   emit_int8((unsigned char)0xFF);
8311   emit_int8((unsigned char)(0xC8 | encode));
8312 }
8313 
8314 void Assembler::decq(Register dst) {
8315   // Don't use it directly. Use MacroAssembler::decrementq() instead.
8316   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8317   int encode = prefixq_and_encode(dst->encoding());
8318   emit_int8((unsigned char)0xFF);
8319   emit_int8(0xC8 | encode);
8320 }
8321 
8322 void Assembler::decq(Address dst) {
8323   // Don't use it directly. Use MacroAssembler::decrementq() instead.
8324   InstructionMark im(this);
8325   prefixq(dst);
8326   emit_int8((unsigned char)0xFF);
8327   emit_operand(rcx, dst);
8328 }
8329 
8330 void Assembler::fxrstor(Address src) {
8331   prefixq(src);
8332   emit_int8(0x0F);
8333   emit_int8((unsigned char)0xAE);
8334   emit_operand(as_Register(1), src);
8335 }
8336 
8337 void Assembler::xrstor(Address src) {
8338   prefixq(src);
8339   emit_int8(0x0F);
8340   emit_int8((unsigned char)0xAE);
8341   emit_operand(as_Register(5), src);
8342 }
8343 
8344 void Assembler::fxsave(Address dst) {
8345   prefixq(dst);
8346   emit_int8(0x0F);
8347   emit_int8((unsigned char)0xAE);
8348   emit_operand(as_Register(0), dst);
8349 }
8350 
8351 void Assembler::xsave(Address dst) {
8352   prefixq(dst);
8353   emit_int8(0x0F);
8354   emit_int8((unsigned char)0xAE);
8355   emit_operand(as_Register(4), dst);
8356 }
8357 
8358 void Assembler::idivq(Register src) {
8359   int encode = prefixq_and_encode(src->encoding());
8360   emit_int8((unsigned char)0xF7);
8361   emit_int8((unsigned char)(0xF8 | encode));
8362 }
8363 
8364 void Assembler::imulq(Register dst, Register src) {
8365   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8366   emit_int8(0x0F);
8367   emit_int8((unsigned char)0xAF);
8368   emit_int8((unsigned char)(0xC0 | encode));
8369 }
8370 
8371 void Assembler::imulq(Register dst, Register src, int value) {
8372   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8373   if (is8bit(value)) {
8374     emit_int8(0x6B);
8375     emit_int8((unsigned char)(0xC0 | encode));
8376     emit_int8(value & 0xFF);
8377   } else {
8378     emit_int8(0x69);
8379     emit_int8((unsigned char)(0xC0 | encode));
8380     emit_int32(value);
8381   }
8382 }
8383 
8384 void Assembler::imulq(Register dst, Address src) {
8385   InstructionMark im(this);
8386   prefixq(src, dst);
8387   emit_int8(0x0F);
8388   emit_int8((unsigned char) 0xAF);
8389   emit_operand(dst, src);
8390 }
8391 
8392 void Assembler::incl(Register dst) {
8393   // Don't use it directly. Use MacroAssembler::incrementl() instead.
8394   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8395   int encode = prefix_and_encode(dst->encoding());
8396   emit_int8((unsigned char)0xFF);
8397   emit_int8((unsigned char)(0xC0 | encode));
8398 }
8399 
8400 void Assembler::incq(Register dst) {
8401   // Don't use it directly. Use MacroAssembler::incrementq() instead.
8402   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8403   int encode = prefixq_and_encode(dst->encoding());
8404   emit_int8((unsigned char)0xFF);
8405   emit_int8((unsigned char)(0xC0 | encode));
8406 }
8407 
8408 void Assembler::incq(Address dst) {
8409   // Don't use it directly. Use MacroAssembler::incrementq() instead.
8410   InstructionMark im(this);
8411   prefixq(dst);
8412   emit_int8((unsigned char)0xFF);
8413   emit_operand(rax, dst);
8414 }
8415 
8416 void Assembler::lea(Register dst, Address src) {
8417   leaq(dst, src);
8418 }
8419 
8420 void Assembler::leaq(Register dst, Address src) {
8421   InstructionMark im(this);
8422   prefixq(src, dst);
8423   emit_int8((unsigned char)0x8D);
8424   emit_operand(dst, src);
8425 }
8426 
8427 void Assembler::mov64(Register dst, int64_t imm64) {
8428   InstructionMark im(this);
8429   int encode = prefixq_and_encode(dst->encoding());
8430   emit_int8((unsigned char)(0xB8 | encode));
8431   emit_int64(imm64);
8432 }
8433 
8434 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
8435   InstructionMark im(this);
8436   int encode = prefixq_and_encode(dst->encoding());
8437   emit_int8(0xB8 | encode);
8438   emit_data64(imm64, rspec);
8439 }
8440 
8441 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
8442   InstructionMark im(this);
8443   int encode = prefix_and_encode(dst->encoding());
8444   emit_int8((unsigned char)(0xB8 | encode));
8445   emit_data((int)imm32, rspec, narrow_oop_operand);
8446 }
8447 
8448 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
8449   InstructionMark im(this);
8450   prefix(dst);
8451   emit_int8((unsigned char)0xC7);
8452   emit_operand(rax, dst, 4);
8453   emit_data((int)imm32, rspec, narrow_oop_operand);
8454 }
8455 
8456 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
8457   InstructionMark im(this);
8458   int encode = prefix_and_encode(src1->encoding());
8459   emit_int8((unsigned char)0x81);
8460   emit_int8((unsigned char)(0xF8 | encode));
8461   emit_data((int)imm32, rspec, narrow_oop_operand);
8462 }
8463 
8464 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
8465   InstructionMark im(this);
8466   prefix(src1);
8467   emit_int8((unsigned char)0x81);
8468   emit_operand(rax, src1, 4);
8469   emit_data((int)imm32, rspec, narrow_oop_operand);
8470 }
8471 
8472 void Assembler::lzcntq(Register dst, Register src) {
8473   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
8474   emit_int8((unsigned char)0xF3);
8475   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8476   emit_int8(0x0F);
8477   emit_int8((unsigned char)0xBD);
8478   emit_int8((unsigned char)(0xC0 | encode));
8479 }
8480 
8481 void Assembler::movdq(XMMRegister dst, Register src) {
8482   // table D-1 says MMX/SSE2
8483   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8484   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8485   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8486   emit_int8(0x6E);
8487   emit_int8((unsigned char)(0xC0 | encode));
8488 }
8489 
8490 void Assembler::movdq(Register dst, XMMRegister src) {
8491   // table D-1 says MMX/SSE2
8492   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8493   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8494   // swap src/dst to get correct prefix
8495   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8496   emit_int8(0x7E);
8497   emit_int8((unsigned char)(0xC0 | encode));
8498 }
8499 
8500 void Assembler::movq(Register dst, Register src) {
8501   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8502   emit_int8((unsigned char)0x8B);
8503   emit_int8((unsigned char)(0xC0 | encode));
8504 }
8505 
8506 void Assembler::movq(Register dst, Address src) {
8507   InstructionMark im(this);
8508   prefixq(src, dst);
8509   emit_int8((unsigned char)0x8B);
8510   emit_operand(dst, src);
8511 }
8512 
8513 void Assembler::movq(Address dst, Register src) {
8514   InstructionMark im(this);
8515   prefixq(dst, src);
8516   emit_int8((unsigned char)0x89);
8517   emit_operand(src, dst);
8518 }
8519 
8520 void Assembler::movsbq(Register dst, Address src) {
8521   InstructionMark im(this);
8522   prefixq(src, dst);
8523   emit_int8(0x0F);
8524   emit_int8((unsigned char)0xBE);
8525   emit_operand(dst, src);
8526 }
8527 
8528 void Assembler::movsbq(Register dst, Register src) {
8529   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8530   emit_int8(0x0F);
8531   emit_int8((unsigned char)0xBE);
8532   emit_int8((unsigned char)(0xC0 | encode));
8533 }
8534 
8535 void Assembler::movslq(Register dst, int32_t imm32) {
8536   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
8537   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
8538   // as a result we shouldn't use until tested at runtime...
8539   ShouldNotReachHere();
8540   InstructionMark im(this);
8541   int encode = prefixq_and_encode(dst->encoding());
8542   emit_int8((unsigned char)(0xC7 | encode));
8543   emit_int32(imm32);
8544 }
8545 
8546 void Assembler::movslq(Address dst, int32_t imm32) {
8547   assert(is_simm32(imm32), "lost bits");
8548   InstructionMark im(this);
8549   prefixq(dst);
8550   emit_int8((unsigned char)0xC7);
8551   emit_operand(rax, dst, 4);
8552   emit_int32(imm32);
8553 }
8554 
8555 void Assembler::movslq(Register dst, Address src) {
8556   InstructionMark im(this);
8557   prefixq(src, dst);
8558   emit_int8(0x63);
8559   emit_operand(dst, src);
8560 }
8561 
8562 void Assembler::movslq(Register dst, Register src) {
8563   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8564   emit_int8(0x63);
8565   emit_int8((unsigned char)(0xC0 | encode));
8566 }
8567 
8568 void Assembler::movswq(Register dst, Address src) {
8569   InstructionMark im(this);
8570   prefixq(src, dst);
8571   emit_int8(0x0F);
8572   emit_int8((unsigned char)0xBF);
8573   emit_operand(dst, src);
8574 }
8575 
8576 void Assembler::movswq(Register dst, Register src) {
8577   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8578   emit_int8((unsigned char)0x0F);
8579   emit_int8((unsigned char)0xBF);
8580   emit_int8((unsigned char)(0xC0 | encode));
8581 }
8582 
8583 void Assembler::movzbq(Register dst, Address src) {
8584   InstructionMark im(this);
8585   prefixq(src, dst);
8586   emit_int8((unsigned char)0x0F);
8587   emit_int8((unsigned char)0xB6);
8588   emit_operand(dst, src);
8589 }
8590 
8591 void Assembler::movzbq(Register dst, Register src) {
8592   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8593   emit_int8(0x0F);
8594   emit_int8((unsigned char)0xB6);
8595   emit_int8(0xC0 | encode);
8596 }
8597 
8598 void Assembler::movzwq(Register dst, Address src) {
8599   InstructionMark im(this);
8600   prefixq(src, dst);
8601   emit_int8((unsigned char)0x0F);
8602   emit_int8((unsigned char)0xB7);
8603   emit_operand(dst, src);
8604 }
8605 
8606 void Assembler::movzwq(Register dst, Register src) {
8607   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8608   emit_int8((unsigned char)0x0F);
8609   emit_int8((unsigned char)0xB7);
8610   emit_int8((unsigned char)(0xC0 | encode));
8611 }
8612 
8613 void Assembler::mulq(Address src) {
8614   InstructionMark im(this);
8615   prefixq(src);
8616   emit_int8((unsigned char)0xF7);
8617   emit_operand(rsp, src);
8618 }
8619 
8620 void Assembler::mulq(Register src) {
8621   int encode = prefixq_and_encode(src->encoding());
8622   emit_int8((unsigned char)0xF7);
8623   emit_int8((unsigned char)(0xE0 | encode));
8624 }
8625 
8626 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
8627   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8628   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8629   int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
8630   emit_int8((unsigned char)0xF6);
8631   emit_int8((unsigned char)(0xC0 | encode));
8632 }
8633 
8634 void Assembler::negq(Register dst) {
8635   int encode = prefixq_and_encode(dst->encoding());
8636   emit_int8((unsigned char)0xF7);
8637   emit_int8((unsigned char)(0xD8 | encode));
8638 }
8639 
8640 void Assembler::notq(Register dst) {
8641   int encode = prefixq_and_encode(dst->encoding());
8642   emit_int8((unsigned char)0xF7);
8643   emit_int8((unsigned char)(0xD0 | encode));
8644 }
8645 
8646 void Assembler::orq(Address dst, int32_t imm32) {
8647   InstructionMark im(this);
8648   prefixq(dst);
8649   emit_int8((unsigned char)0x81);
8650   emit_operand(rcx, dst, 4);
8651   emit_int32(imm32);
8652 }
8653 
8654 void Assembler::orq(Register dst, int32_t imm32) {
8655   (void) prefixq_and_encode(dst->encoding());
8656   emit_arith(0x81, 0xC8, dst, imm32);
8657 }
8658 
8659 void Assembler::orq(Register dst, Address src) {
8660   InstructionMark im(this);
8661   prefixq(src, dst);
8662   emit_int8(0x0B);
8663   emit_operand(dst, src);
8664 }
8665 
8666 void Assembler::orq(Register dst, Register src) {
8667   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8668   emit_arith(0x0B, 0xC0, dst, src);
8669 }
8670 
8671 void Assembler::popa() { // 64bit
8672   movq(r15, Address(rsp, 0));
8673   movq(r14, Address(rsp, wordSize));
8674   movq(r13, Address(rsp, 2 * wordSize));
8675   movq(r12, Address(rsp, 3 * wordSize));
8676   movq(r11, Address(rsp, 4 * wordSize));
8677   movq(r10, Address(rsp, 5 * wordSize));
8678   movq(r9,  Address(rsp, 6 * wordSize));
8679   movq(r8,  Address(rsp, 7 * wordSize));
8680   movq(rdi, Address(rsp, 8 * wordSize));
8681   movq(rsi, Address(rsp, 9 * wordSize));
8682   movq(rbp, Address(rsp, 10 * wordSize));
8683   // skip rsp
8684   movq(rbx, Address(rsp, 12 * wordSize));
8685   movq(rdx, Address(rsp, 13 * wordSize));
8686   movq(rcx, Address(rsp, 14 * wordSize));
8687   movq(rax, Address(rsp, 15 * wordSize));
8688 
8689   addq(rsp, 16 * wordSize);
8690 }
8691 
8692 void Assembler::popcntq(Register dst, Address src) {
8693   assert(VM_Version::supports_popcnt(), "must support");
8694   InstructionMark im(this);
8695   emit_int8((unsigned char)0xF3);
8696   prefixq(src, dst);
8697   emit_int8((unsigned char)0x0F);
8698   emit_int8((unsigned char)0xB8);
8699   emit_operand(dst, src);
8700 }
8701 
8702 void Assembler::popcntq(Register dst, Register src) {
8703   assert(VM_Version::supports_popcnt(), "must support");
8704   emit_int8((unsigned char)0xF3);
8705   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8706   emit_int8((unsigned char)0x0F);
8707   emit_int8((unsigned char)0xB8);
8708   emit_int8((unsigned char)(0xC0 | encode));
8709 }
8710 
8711 void Assembler::popq(Address dst) {
8712   InstructionMark im(this);
8713   prefixq(dst);
8714   emit_int8((unsigned char)0x8F);
8715   emit_operand(rax, dst);
8716 }
8717 
8718 void Assembler::pusha() { // 64bit
8719   // we have to store original rsp.  ABI says that 128 bytes
8720   // below rsp are local scratch.
8721   movq(Address(rsp, -5 * wordSize), rsp);
8722 
8723   subq(rsp, 16 * wordSize);
8724 
8725   movq(Address(rsp, 15 * wordSize), rax);
8726   movq(Address(rsp, 14 * wordSize), rcx);
8727   movq(Address(rsp, 13 * wordSize), rdx);
8728   movq(Address(rsp, 12 * wordSize), rbx);
8729   // skip rsp
8730   movq(Address(rsp, 10 * wordSize), rbp);
8731   movq(Address(rsp, 9 * wordSize), rsi);
8732   movq(Address(rsp, 8 * wordSize), rdi);
8733   movq(Address(rsp, 7 * wordSize), r8);
8734   movq(Address(rsp, 6 * wordSize), r9);
8735   movq(Address(rsp, 5 * wordSize), r10);
8736   movq(Address(rsp, 4 * wordSize), r11);
8737   movq(Address(rsp, 3 * wordSize), r12);
8738   movq(Address(rsp, 2 * wordSize), r13);
8739   movq(Address(rsp, wordSize), r14);
8740   movq(Address(rsp, 0), r15);
8741 }
8742 
8743 void Assembler::pushq(Address src) {
8744   InstructionMark im(this);
8745   prefixq(src);
8746   emit_int8((unsigned char)0xFF);
8747   emit_operand(rsi, src);
8748 }
8749 
8750 void Assembler::rclq(Register dst, int imm8) {
8751   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8752   int encode = prefixq_and_encode(dst->encoding());
8753   if (imm8 == 1) {
8754     emit_int8((unsigned char)0xD1);
8755     emit_int8((unsigned char)(0xD0 | encode));
8756   } else {
8757     emit_int8((unsigned char)0xC1);
8758     emit_int8((unsigned char)(0xD0 | encode));
8759     emit_int8(imm8);
8760   }
8761 }
8762 
8763 void Assembler::rcrq(Register dst, int imm8) {
8764   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8765   int encode = prefixq_and_encode(dst->encoding());
8766   if (imm8 == 1) {
8767     emit_int8((unsigned char)0xD1);
8768     emit_int8((unsigned char)(0xD8 | encode));
8769   } else {
8770     emit_int8((unsigned char)0xC1);
8771     emit_int8((unsigned char)(0xD8 | encode));
8772     emit_int8(imm8);
8773   }
8774 }
8775 
8776 void Assembler::rorq(Register dst, int imm8) {
8777   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8778   int encode = prefixq_and_encode(dst->encoding());
8779   if (imm8 == 1) {
8780     emit_int8((unsigned char)0xD1);
8781     emit_int8((unsigned char)(0xC8 | encode));
8782   } else {
8783     emit_int8((unsigned char)0xC1);
8784     emit_int8((unsigned char)(0xc8 | encode));
8785     emit_int8(imm8);
8786   }
8787 }
8788 
8789 void Assembler::rorxq(Register dst, Register src, int imm8) {
8790   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8791   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8792   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
8793   emit_int8((unsigned char)0xF0);
8794   emit_int8((unsigned char)(0xC0 | encode));
8795   emit_int8(imm8);
8796 }
8797 
8798 void Assembler::rorxd(Register dst, Register src, int imm8) {
8799   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8800   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8801   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
8802   emit_int8((unsigned char)0xF0);
8803   emit_int8((unsigned char)(0xC0 | encode));
8804   emit_int8(imm8);
8805 }
8806 
8807 void Assembler::sarq(Register dst, int imm8) {
8808   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8809   int encode = prefixq_and_encode(dst->encoding());
8810   if (imm8 == 1) {
8811     emit_int8((unsigned char)0xD1);
8812     emit_int8((unsigned char)(0xF8 | encode));
8813   } else {
8814     emit_int8((unsigned char)0xC1);
8815     emit_int8((unsigned char)(0xF8 | encode));
8816     emit_int8(imm8);
8817   }
8818 }
8819 
8820 void Assembler::sarq(Register dst) {
8821   int encode = prefixq_and_encode(dst->encoding());
8822   emit_int8((unsigned char)0xD3);
8823   emit_int8((unsigned char)(0xF8 | encode));
8824 }
8825 
8826 void Assembler::sbbq(Address dst, int32_t imm32) {
8827   InstructionMark im(this);
8828   prefixq(dst);
8829   emit_arith_operand(0x81, rbx, dst, imm32);
8830 }
8831 
8832 void Assembler::sbbq(Register dst, int32_t imm32) {
8833   (void) prefixq_and_encode(dst->encoding());
8834   emit_arith(0x81, 0xD8, dst, imm32);
8835 }
8836 
8837 void Assembler::sbbq(Register dst, Address src) {
8838   InstructionMark im(this);
8839   prefixq(src, dst);
8840   emit_int8(0x1B);
8841   emit_operand(dst, src);
8842 }
8843 
8844 void Assembler::sbbq(Register dst, Register src) {
8845   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8846   emit_arith(0x1B, 0xC0, dst, src);
8847 }
8848 
8849 void Assembler::shlq(Register dst, int imm8) {
8850   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8851   int encode = prefixq_and_encode(dst->encoding());
8852   if (imm8 == 1) {
8853     emit_int8((unsigned char)0xD1);
8854     emit_int8((unsigned char)(0xE0 | encode));
8855   } else {
8856     emit_int8((unsigned char)0xC1);
8857     emit_int8((unsigned char)(0xE0 | encode));
8858     emit_int8(imm8);
8859   }
8860 }
8861 
8862 void Assembler::shlq(Register dst) {
8863   int encode = prefixq_and_encode(dst->encoding());
8864   emit_int8((unsigned char)0xD3);
8865   emit_int8((unsigned char)(0xE0 | encode));
8866 }
8867 
8868 void Assembler::shrq(Register dst, int imm8) {
8869   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8870   int encode = prefixq_and_encode(dst->encoding());
8871   emit_int8((unsigned char)0xC1);
8872   emit_int8((unsigned char)(0xE8 | encode));
8873   emit_int8(imm8);
8874 }
8875 
8876 void Assembler::shrq(Register dst) {
8877   int encode = prefixq_and_encode(dst->encoding());
8878   emit_int8((unsigned char)0xD3);
8879   emit_int8(0xE8 | encode);
8880 }
8881 
8882 void Assembler::subq(Address dst, int32_t imm32) {
8883   InstructionMark im(this);
8884   prefixq(dst);
8885   emit_arith_operand(0x81, rbp, dst, imm32);
8886 }
8887 
8888 void Assembler::subq(Address dst, Register src) {
8889   InstructionMark im(this);
8890   prefixq(dst, src);
8891   emit_int8(0x29);
8892   emit_operand(src, dst);
8893 }
8894 
8895 void Assembler::subq(Register dst, int32_t imm32) {
8896   (void) prefixq_and_encode(dst->encoding());
8897   emit_arith(0x81, 0xE8, dst, imm32);
8898 }
8899 
8900 // Force generation of a 4 byte immediate value even if it fits into 8bit
8901 void Assembler::subq_imm32(Register dst, int32_t imm32) {
8902   (void) prefixq_and_encode(dst->encoding());
8903   emit_arith_imm32(0x81, 0xE8, dst, imm32);
8904 }
8905 
8906 void Assembler::subq(Register dst, Address src) {
8907   InstructionMark im(this);
8908   prefixq(src, dst);
8909   emit_int8(0x2B);
8910   emit_operand(dst, src);
8911 }
8912 
8913 void Assembler::subq(Register dst, Register src) {
8914   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8915   emit_arith(0x2B, 0xC0, dst, src);
8916 }
8917 
8918 void Assembler::testq(Register dst, int32_t imm32) {
8919   // not using emit_arith because test
8920   // doesn't support sign-extension of
8921   // 8bit operands
8922   int encode = dst->encoding();
8923   if (encode == 0) {
8924     prefix(REX_W);
8925     emit_int8((unsigned char)0xA9);
8926   } else {
8927     encode = prefixq_and_encode(encode);
8928     emit_int8((unsigned char)0xF7);
8929     emit_int8((unsigned char)(0xC0 | encode));
8930   }
8931   emit_int32(imm32);
8932 }
8933 
8934 void Assembler::testq(Register dst, Register src) {
8935   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8936   emit_arith(0x85, 0xC0, dst, src);
8937 }
8938 
8939 void Assembler::xaddq(Address dst, Register src) {
8940   InstructionMark im(this);
8941   prefixq(dst, src);
8942   emit_int8(0x0F);
8943   emit_int8((unsigned char)0xC1);
8944   emit_operand(src, dst);
8945 }
8946 
8947 void Assembler::xchgq(Register dst, Address src) {
8948   InstructionMark im(this);
8949   prefixq(src, dst);
8950   emit_int8((unsigned char)0x87);
8951   emit_operand(dst, src);
8952 }
8953 
8954 void Assembler::xchgq(Register dst, Register src) {
8955   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8956   emit_int8((unsigned char)0x87);
8957   emit_int8((unsigned char)(0xc0 | encode));
8958 }
8959 
8960 void Assembler::xorq(Register dst, Register src) {
8961   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8962   emit_arith(0x33, 0xC0, dst, src);
8963 }
8964 
8965 void Assembler::xorq(Register dst, Address src) {
8966   InstructionMark im(this);
8967   prefixq(src, dst);
8968   emit_int8(0x33);
8969   emit_operand(dst, src);
8970 }
8971 
8972 #endif // !LP64