Old src/hotspot/cpu/x86/assembler

   1 /*
   2  * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "gc/shared/cardTableBarrierSet.hpp"
  29 #include "gc/shared/collectedHeap.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "memory/resourceArea.hpp"
  32 #include "prims/methodHandles.hpp"
  33 #include "runtime/biasedLocking.hpp"
  34 #include "runtime/objectMonitor.hpp"
  35 #include "runtime/os.hpp"
  36 #include "runtime/sharedRuntime.hpp"
  37 #include "runtime/stubRoutines.hpp"
  38 #include "utilities/macros.hpp"
  39 #if INCLUDE_ALL_GCS
  40 #include "gc/g1/g1BarrierSet.hpp"
  41 #include "gc/g1/g1CollectedHeap.inline.hpp"
  42 #include "gc/g1/heapRegion.hpp"
  43 #endif // INCLUDE_ALL_GCS
  44 
  45 #ifdef PRODUCT
  46 #define BLOCK_COMMENT(str) /* nothing */
  47 #define STOP(error) stop(error)
  48 #else
  49 #define BLOCK_COMMENT(str) block_comment(str)
  50 #define STOP(error) block_comment(error); stop(error)
  51 #endif
  52 
  53 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  54 // Implementation of AddressLiteral
  55 
  56 // A 2-D table for managing compressed displacement(disp8) on EVEX enabled platforms.
  57 unsigned char tuple_table[Assembler::EVEX_ETUP + 1][Assembler::AVX_512bit + 1] = {
  58   // -----------------Table 4.5 -------------------- //
  59   16, 32, 64,  // EVEX_FV(0)
  60   4,  4,  4,   // EVEX_FV(1) - with Evex.b
  61   16, 32, 64,  // EVEX_FV(2) - with Evex.w
  62   8,  8,  8,   // EVEX_FV(3) - with Evex.w and Evex.b
  63   8,  16, 32,  // EVEX_HV(0)
  64   4,  4,  4,   // EVEX_HV(1) - with Evex.b
  65   // -----------------Table 4.6 -------------------- //
  66   16, 32, 64,  // EVEX_FVM(0)
  67   1,  1,  1,   // EVEX_T1S(0)
  68   2,  2,  2,   // EVEX_T1S(1)
  69   4,  4,  4,   // EVEX_T1S(2)
  70   8,  8,  8,   // EVEX_T1S(3)
  71   4,  4,  4,   // EVEX_T1F(0)
  72   8,  8,  8,   // EVEX_T1F(1)
  73   8,  8,  8,   // EVEX_T2(0)
  74   0,  16, 16,  // EVEX_T2(1)
  75   0,  16, 16,  // EVEX_T4(0)
  76   0,  0,  32,  // EVEX_T4(1)
  77   0,  0,  32,  // EVEX_T8(0)
  78   8,  16, 32,  // EVEX_HVM(0)
  79   4,  8,  16,  // EVEX_QVM(0)
  80   2,  4,  8,   // EVEX_OVM(0)
  81   16, 16, 16,  // EVEX_M128(0)
  82   8,  32, 64,  // EVEX_DUP(0)
  83   0,  0,  0    // EVEX_NTUP
  84 };
  85 
  86 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  87   _is_lval = false;
  88   _target = target;
  89   switch (rtype) {
  90   case relocInfo::oop_type:
  91   case relocInfo::metadata_type:
  92     // Oops are a special case. Normally they would be their own section
  93     // but in cases like icBuffer they are literals in the code stream that
  94     // we don't have a section for. We use none so that we get a literal address
  95     // which is always patchable.
  96     break;
  97   case relocInfo::external_word_type:
  98     _rspec = external_word_Relocation::spec(target);
  99     break;
 100   case relocInfo::internal_word_type:
 101     _rspec = internal_word_Relocation::spec(target);
 102     break;
 103   case relocInfo::opt_virtual_call_type:
 104     _rspec = opt_virtual_call_Relocation::spec();
 105     break;
 106   case relocInfo::static_call_type:
 107     _rspec = static_call_Relocation::spec();
 108     break;
 109   case relocInfo::runtime_call_type:
 110     _rspec = runtime_call_Relocation::spec();
 111     break;
 112   case relocInfo::poll_type:
 113   case relocInfo::poll_return_type:
 114     _rspec = Relocation::spec_simple(rtype);
 115     break;
 116   case relocInfo::none:
 117     break;
 118   default:
 119     ShouldNotReachHere();
 120     break;
 121   }
 122 }
 123 
 124 // Implementation of Address
 125 
 126 #ifdef _LP64
 127 
 128 Address Address::make_array(ArrayAddress adr) {
 129   // Not implementable on 64bit machines
 130   // Should have been handled higher up the call chain.
 131   ShouldNotReachHere();
 132   return Address();
 133 }
 134 
 135 // exceedingly dangerous constructor
 136 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
 137   _base  = noreg;
 138   _index = noreg;
 139   _scale = no_scale;
 140   _disp  = disp;
 141   switch (rtype) {
 142     case relocInfo::external_word_type:
 143       _rspec = external_word_Relocation::spec(loc);
 144       break;
 145     case relocInfo::internal_word_type:
 146       _rspec = internal_word_Relocation::spec(loc);
 147       break;
 148     case relocInfo::runtime_call_type:
 149       // HMM
 150       _rspec = runtime_call_Relocation::spec();
 151       break;
 152     case relocInfo::poll_type:
 153     case relocInfo::poll_return_type:
 154       _rspec = Relocation::spec_simple(rtype);
 155       break;
 156     case relocInfo::none:
 157       break;
 158     default:
 159       ShouldNotReachHere();
 160   }
 161 }
 162 #else // LP64
 163 
 164 Address Address::make_array(ArrayAddress adr) {
 165   AddressLiteral base = adr.base();
 166   Address index = adr.index();
 167   assert(index._disp == 0, "must not have disp"); // maybe it can?
 168   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
 169   array._rspec = base._rspec;
 170   return array;
 171 }
 172 
 173 // exceedingly dangerous constructor
 174 Address::Address(address loc, RelocationHolder spec) {
 175   _base  = noreg;
 176   _index = noreg;
 177   _scale = no_scale;
 178   _disp  = (intptr_t) loc;
 179   _rspec = spec;
 180 }
 181 
 182 #endif // _LP64
 183 
 184 
 185 
 186 // Convert the raw encoding form into the form expected by the constructor for
 187 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 188 // that to noreg for the Address constructor.
 189 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
 190   RelocationHolder rspec;
 191   if (disp_reloc != relocInfo::none) {
 192     rspec = Relocation::spec_simple(disp_reloc);
 193   }
 194   bool valid_index = index != rsp->encoding();
 195   if (valid_index) {
 196     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 197     madr._rspec = rspec;
 198     return madr;
 199   } else {
 200     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 201     madr._rspec = rspec;
 202     return madr;
 203   }
 204 }
 205 
 206 // Implementation of Assembler
 207 
 208 int AbstractAssembler::code_fill_byte() {
 209   return (u_char)'\xF4'; // hlt
 210 }
 211 
 212 // make this go away someday
 213 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
 214   if (rtype == relocInfo::none)
 215     emit_int32(data);
 216   else
 217     emit_data(data, Relocation::spec_simple(rtype), format);
 218 }
 219 
 220 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
 221   assert(imm_operand == 0, "default format must be immediate in this file");
 222   assert(inst_mark() != NULL, "must be inside InstructionMark");
 223   if (rspec.type() !=  relocInfo::none) {
 224     #ifdef ASSERT
 225       check_relocation(rspec, format);
 226     #endif
 227     // Do not use AbstractAssembler::relocate, which is not intended for
 228     // embedded words.  Instead, relocate to the enclosing instruction.
 229 
 230     // hack. call32 is too wide for mask so use disp32
 231     if (format == call32_operand)
 232       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 233     else
 234       code_section()->relocate(inst_mark(), rspec, format);
 235   }
 236   emit_int32(data);
 237 }
 238 
 239 static int encode(Register r) {
 240   int enc = r->encoding();
 241   if (enc >= 8) {
 242     enc -= 8;
 243   }
 244   return enc;
 245 }
 246 
 247 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
 248   assert(dst->has_byte_register(), "must have byte register");
 249   assert(isByte(op1) && isByte(op2), "wrong opcode");
 250   assert(isByte(imm8), "not a byte");
 251   assert((op1 & 0x01) == 0, "should be 8bit operation");
 252   emit_int8(op1);
 253   emit_int8(op2 | encode(dst));
 254   emit_int8(imm8);
 255 }
 256 
 257 
 258 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
 259   assert(isByte(op1) && isByte(op2), "wrong opcode");
 260   assert((op1 & 0x01) == 1, "should be 32bit operation");
 261   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 262   if (is8bit(imm32)) {
 263     emit_int8(op1 | 0x02); // set sign bit
 264     emit_int8(op2 | encode(dst));
 265     emit_int8(imm32 & 0xFF);
 266   } else {
 267     emit_int8(op1);
 268     emit_int8(op2 | encode(dst));
 269     emit_int32(imm32);
 270   }
 271 }
 272 
 273 // Force generation of a 4 byte immediate value even if it fits into 8bit
 274 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
 275   assert(isByte(op1) && isByte(op2), "wrong opcode");
 276   assert((op1 & 0x01) == 1, "should be 32bit operation");
 277   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 278   emit_int8(op1);
 279   emit_int8(op2 | encode(dst));
 280   emit_int32(imm32);
 281 }
 282 
 283 // immediate-to-memory forms
 284 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
 285   assert((op1 & 0x01) == 1, "should be 32bit operation");
 286   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 287   if (is8bit(imm32)) {
 288     emit_int8(op1 | 0x02); // set sign bit
 289     emit_operand(rm, adr, 1);
 290     emit_int8(imm32 & 0xFF);
 291   } else {
 292     emit_int8(op1);
 293     emit_operand(rm, adr, 4);
 294     emit_int32(imm32);
 295   }
 296 }
 297 
 298 
 299 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 300   assert(isByte(op1) && isByte(op2), "wrong opcode");
 301   emit_int8(op1);
 302   emit_int8(op2 | encode(dst) << 3 | encode(src));
 303 }
 304 
 305 
 306 bool Assembler::query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
 307                                            int cur_tuple_type, int in_size_in_bits, int cur_encoding) {
 308   int mod_idx = 0;
 309   // We will test if the displacement fits the compressed format and if so
 310   // apply the compression to the displacment iff the result is8bit.
 311   if (VM_Version::supports_evex() && is_evex_inst) {
 312     switch (cur_tuple_type) {
 313     case EVEX_FV:
 314       if ((cur_encoding & VEX_W) == VEX_W) {
 315         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 316       } else {
 317         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 318       }
 319       break;
 320 
 321     case EVEX_HV:
 322       mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 323       break;
 324 
 325     case EVEX_FVM:
 326       break;
 327 
 328     case EVEX_T1S:
 329       switch (in_size_in_bits) {
 330       case EVEX_8bit:
 331         break;
 332 
 333       case EVEX_16bit:
 334         mod_idx = 1;
 335         break;
 336 
 337       case EVEX_32bit:
 338         mod_idx = 2;
 339         break;
 340 
 341       case EVEX_64bit:
 342         mod_idx = 3;
 343         break;
 344       }
 345       break;
 346 
 347     case EVEX_T1F:
 348     case EVEX_T2:
 349     case EVEX_T4:
 350       mod_idx = (in_size_in_bits == EVEX_64bit) ? 1 : 0;
 351       break;
 352 
 353     case EVEX_T8:
 354       break;
 355 
 356     case EVEX_HVM:
 357       break;
 358 
 359     case EVEX_QVM:
 360       break;
 361 
 362     case EVEX_OVM:
 363       break;
 364 
 365     case EVEX_M128:
 366       break;
 367 
 368     case EVEX_DUP:
 369       break;
 370 
 371     default:
 372       assert(0, "no valid evex tuple_table entry");
 373       break;
 374     }
 375 
 376     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 377       int disp_factor = tuple_table[cur_tuple_type + mod_idx][vector_len];
 378       if ((disp % disp_factor) == 0) {
 379         int new_disp = disp / disp_factor;
 380         if ((-0x80 <= new_disp && new_disp < 0x80)) {
 381           disp = new_disp;
 382         }
 383       } else {
 384         return false;
 385       }
 386     }
 387   }
 388   return (-0x80 <= disp && disp < 0x80);
 389 }
 390 
 391 
 392 bool Assembler::emit_compressed_disp_byte(int &disp) {
 393   int mod_idx = 0;
 394   // We will test if the displacement fits the compressed format and if so
 395   // apply the compression to the displacment iff the result is8bit.
 396   if (VM_Version::supports_evex() && _attributes && _attributes->is_evex_instruction()) {
 397     int evex_encoding = _attributes->get_evex_encoding();
 398     int tuple_type = _attributes->get_tuple_type();
 399     switch (tuple_type) {
 400     case EVEX_FV:
 401       if ((evex_encoding & VEX_W) == VEX_W) {
 402         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 403       } else {
 404         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 405       }
 406       break;
 407 
 408     case EVEX_HV:
 409       mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 410       break;
 411 
 412     case EVEX_FVM:
 413       break;
 414 
 415     case EVEX_T1S:
 416       switch (_attributes->get_input_size()) {
 417       case EVEX_8bit:
 418         break;
 419 
 420       case EVEX_16bit:
 421         mod_idx = 1;
 422         break;
 423 
 424       case EVEX_32bit:
 425         mod_idx = 2;
 426         break;
 427 
 428       case EVEX_64bit:
 429         mod_idx = 3;
 430         break;
 431       }
 432       break;
 433 
 434     case EVEX_T1F:
 435     case EVEX_T2:
 436     case EVEX_T4:
 437       mod_idx = (_attributes->get_input_size() == EVEX_64bit) ? 1 : 0;
 438       break;
 439 
 440     case EVEX_T8:
 441       break;
 442 
 443     case EVEX_HVM:
 444       break;
 445 
 446     case EVEX_QVM:
 447       break;
 448 
 449     case EVEX_OVM:
 450       break;
 451 
 452     case EVEX_M128:
 453       break;
 454 
 455     case EVEX_DUP:
 456       break;
 457 
 458     default:
 459       assert(0, "no valid evex tuple_table entry");
 460       break;
 461     }
 462 
 463     int vector_len = _attributes->get_vector_len();
 464     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 465       int disp_factor = tuple_table[tuple_type + mod_idx][vector_len];
 466       if ((disp % disp_factor) == 0) {
 467         int new_disp = disp / disp_factor;
 468         if (is8bit(new_disp)) {
 469           disp = new_disp;
 470         }
 471       } else {
 472         return false;
 473       }
 474     }
 475   }
 476   return is8bit(disp);
 477 }
 478 
 479 
 480 void Assembler::emit_operand(Register reg, Register base, Register index,
 481                              Address::ScaleFactor scale, int disp,
 482                              RelocationHolder const& rspec,
 483                              int rip_relative_correction) {
 484   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 485 
 486   // Encode the registers as needed in the fields they are used in
 487 
 488   int regenc = encode(reg) << 3;
 489   int indexenc = index->is_valid() ? encode(index) << 3 : 0;
 490   int baseenc = base->is_valid() ? encode(base) : 0;
 491 
 492   if (base->is_valid()) {
 493     if (index->is_valid()) {
 494       assert(scale != Address::no_scale, "inconsistent address");
 495       // [base + index*scale + disp]
 496       if (disp == 0 && rtype == relocInfo::none  &&
 497           base != rbp LP64_ONLY(&& base != r13)) {
 498         // [base + index*scale]
 499         // [00 reg 100][ss index base]
 500         assert(index != rsp, "illegal addressing mode");
 501         emit_int8(0x04 | regenc);
 502         emit_int8(scale << 6 | indexenc | baseenc);
 503       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 504         // [base + index*scale + imm8]
 505         // [01 reg 100][ss index base] imm8
 506         assert(index != rsp, "illegal addressing mode");
 507         emit_int8(0x44 | regenc);
 508         emit_int8(scale << 6 | indexenc | baseenc);
 509         emit_int8(disp & 0xFF);
 510       } else {
 511         // [base + index*scale + disp32]
 512         // [10 reg 100][ss index base] disp32
 513         assert(index != rsp, "illegal addressing mode");
 514         emit_int8(0x84 | regenc);
 515         emit_int8(scale << 6 | indexenc | baseenc);
 516         emit_data(disp, rspec, disp32_operand);
 517       }
 518     } else if (base == rsp LP64_ONLY(|| base == r12)) {
 519       // [rsp + disp]
 520       if (disp == 0 && rtype == relocInfo::none) {
 521         // [rsp]
 522         // [00 reg 100][00 100 100]
 523         emit_int8(0x04 | regenc);
 524         emit_int8(0x24);
 525       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 526         // [rsp + imm8]
 527         // [01 reg 100][00 100 100] disp8
 528         emit_int8(0x44 | regenc);
 529         emit_int8(0x24);
 530         emit_int8(disp & 0xFF);
 531       } else {
 532         // [rsp + imm32]
 533         // [10 reg 100][00 100 100] disp32
 534         emit_int8(0x84 | regenc);
 535         emit_int8(0x24);
 536         emit_data(disp, rspec, disp32_operand);
 537       }
 538     } else {
 539       // [base + disp]
 540       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
 541       if (disp == 0 && rtype == relocInfo::none &&
 542           base != rbp LP64_ONLY(&& base != r13)) {
 543         // [base]
 544         // [00 reg base]
 545         emit_int8(0x00 | regenc | baseenc);
 546       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 547         // [base + disp8]
 548         // [01 reg base] disp8
 549         emit_int8(0x40 | regenc | baseenc);
 550         emit_int8(disp & 0xFF);
 551       } else {
 552         // [base + disp32]
 553         // [10 reg base] disp32
 554         emit_int8(0x80 | regenc | baseenc);
 555         emit_data(disp, rspec, disp32_operand);
 556       }
 557     }
 558   } else {
 559     if (index->is_valid()) {
 560       assert(scale != Address::no_scale, "inconsistent address");
 561       // [index*scale + disp]
 562       // [00 reg 100][ss index 101] disp32
 563       assert(index != rsp, "illegal addressing mode");
 564       emit_int8(0x04 | regenc);
 565       emit_int8(scale << 6 | indexenc | 0x05);
 566       emit_data(disp, rspec, disp32_operand);
 567     } else if (rtype != relocInfo::none ) {
 568       // [disp] (64bit) RIP-RELATIVE (32bit) abs
 569       // [00 000 101] disp32
 570 
 571       emit_int8(0x05 | regenc);
 572       // Note that the RIP-rel. correction applies to the generated
 573       // disp field, but _not_ to the target address in the rspec.
 574 
 575       // disp was created by converting the target address minus the pc
 576       // at the start of the instruction. That needs more correction here.
 577       // intptr_t disp = target - next_ip;
 578       assert(inst_mark() != NULL, "must be inside InstructionMark");
 579       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 580       int64_t adjusted = disp;
 581       // Do rip-rel adjustment for 64bit
 582       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 583       assert(is_simm32(adjusted),
 584              "must be 32bit offset (RIP relative address)");
 585       emit_data((int32_t) adjusted, rspec, disp32_operand);
 586 
 587     } else {
 588       // 32bit never did this, did everything as the rip-rel/disp code above
 589       // [disp] ABSOLUTE
 590       // [00 reg 100][00 100 101] disp32
 591       emit_int8(0x04 | regenc);
 592       emit_int8(0x25);
 593       emit_data(disp, rspec, disp32_operand);
 594     }
 595   }
 596 }
 597 
 598 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 599                              Address::ScaleFactor scale, int disp,
 600                              RelocationHolder const& rspec) {
 601   if (UseAVX > 2) {
 602     int xreg_enc = reg->encoding();
 603     if (xreg_enc > 15) {
 604       XMMRegister new_reg = as_XMMRegister(xreg_enc & 0xf);
 605       emit_operand((Register)new_reg, base, index, scale, disp, rspec);
 606       return;
 607     }
 608   }
 609   emit_operand((Register)reg, base, index, scale, disp, rspec);
 610 }
 611 
 612 // Secret local extension to Assembler::WhichOperand:
 613 #define end_pc_operand (_WhichOperand_limit)
 614 
 615 address Assembler::locate_operand(address inst, WhichOperand which) {
 616   // Decode the given instruction, and return the address of
 617   // an embedded 32-bit operand word.
 618 
 619   // If "which" is disp32_operand, selects the displacement portion
 620   // of an effective address specifier.
 621   // If "which" is imm64_operand, selects the trailing immediate constant.
 622   // If "which" is call32_operand, selects the displacement of a call or jump.
 623   // Caller is responsible for ensuring that there is such an operand,
 624   // and that it is 32/64 bits wide.
 625 
 626   // If "which" is end_pc_operand, find the end of the instruction.
 627 
 628   address ip = inst;
 629   bool is_64bit = false;
 630 
 631   debug_only(bool has_disp32 = false);
 632   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
 633 
 634   again_after_prefix:
 635   switch (0xFF & *ip++) {
 636 
 637   // These convenience macros generate groups of "case" labels for the switch.
 638 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
 639 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
 640              case (x)+4: case (x)+5: case (x)+6: case (x)+7
 641 #define REP16(x) REP8((x)+0): \
 642               case REP8((x)+8)
 643 
 644   case CS_segment:
 645   case SS_segment:
 646   case DS_segment:
 647   case ES_segment:
 648   case FS_segment:
 649   case GS_segment:
 650     // Seems dubious
 651     LP64_ONLY(assert(false, "shouldn't have that prefix"));
 652     assert(ip == inst+1, "only one prefix allowed");
 653     goto again_after_prefix;
 654 
 655   case 0x67:
 656   case REX:
 657   case REX_B:
 658   case REX_X:
 659   case REX_XB:
 660   case REX_R:
 661   case REX_RB:
 662   case REX_RX:
 663   case REX_RXB:
 664     NOT_LP64(assert(false, "64bit prefixes"));
 665     goto again_after_prefix;
 666 
 667   case REX_W:
 668   case REX_WB:
 669   case REX_WX:
 670   case REX_WXB:
 671   case REX_WR:
 672   case REX_WRB:
 673   case REX_WRX:
 674   case REX_WRXB:
 675     NOT_LP64(assert(false, "64bit prefixes"));
 676     is_64bit = true;
 677     goto again_after_prefix;
 678 
 679   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
 680   case 0x88: // movb a, r
 681   case 0x89: // movl a, r
 682   case 0x8A: // movb r, a
 683   case 0x8B: // movl r, a
 684   case 0x8F: // popl a
 685     debug_only(has_disp32 = true);
 686     break;
 687 
 688   case 0x68: // pushq #32
 689     if (which == end_pc_operand) {
 690       return ip + 4;
 691     }
 692     assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
 693     return ip;                  // not produced by emit_operand
 694 
 695   case 0x66: // movw ... (size prefix)
 696     again_after_size_prefix2:
 697     switch (0xFF & *ip++) {
 698     case REX:
 699     case REX_B:
 700     case REX_X:
 701     case REX_XB:
 702     case REX_R:
 703     case REX_RB:
 704     case REX_RX:
 705     case REX_RXB:
 706     case REX_W:
 707     case REX_WB:
 708     case REX_WX:
 709     case REX_WXB:
 710     case REX_WR:
 711     case REX_WRB:
 712     case REX_WRX:
 713     case REX_WRXB:
 714       NOT_LP64(assert(false, "64bit prefix found"));
 715       goto again_after_size_prefix2;
 716     case 0x8B: // movw r, a
 717     case 0x89: // movw a, r
 718       debug_only(has_disp32 = true);
 719       break;
 720     case 0xC7: // movw a, #16
 721       debug_only(has_disp32 = true);
 722       tail_size = 2;  // the imm16
 723       break;
 724     case 0x0F: // several SSE/SSE2 variants
 725       ip--;    // reparse the 0x0F
 726       goto again_after_prefix;
 727     default:
 728       ShouldNotReachHere();
 729     }
 730     break;
 731 
 732   case REP8(0xB8): // movl/q r, #32/#64(oop?)
 733     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
 734     // these asserts are somewhat nonsensical
 735 #ifndef _LP64
 736     assert(which == imm_operand || which == disp32_operand,
 737            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 738 #else
 739     assert((which == call32_operand || which == imm_operand) && is_64bit ||
 740            which == narrow_oop_operand && !is_64bit,
 741            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 742 #endif // _LP64
 743     return ip;
 744 
 745   case 0x69: // imul r, a, #32
 746   case 0xC7: // movl a, #32(oop?)
 747     tail_size = 4;
 748     debug_only(has_disp32 = true); // has both kinds of operands!
 749     break;
 750 
 751   case 0x0F: // movx..., etc.
 752     switch (0xFF & *ip++) {
 753     case 0x3A: // pcmpestri
 754       tail_size = 1;
 755     case 0x38: // ptest, pmovzxbw
 756       ip++; // skip opcode
 757       debug_only(has_disp32 = true); // has both kinds of operands!
 758       break;
 759 
 760     case 0x70: // pshufd r, r/a, #8
 761       debug_only(has_disp32 = true); // has both kinds of operands!
 762     case 0x73: // psrldq r, #8
 763       tail_size = 1;
 764       break;
 765 
 766     case 0x12: // movlps
 767     case 0x28: // movaps
 768     case 0x2E: // ucomiss
 769     case 0x2F: // comiss
 770     case 0x54: // andps
 771     case 0x55: // andnps
 772     case 0x56: // orps
 773     case 0x57: // xorps
 774     case 0x58: // addpd
 775     case 0x59: // mulpd
 776     case 0x6E: // movd
 777     case 0x7E: // movd
 778     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
 779     case 0xFE: // paddd
 780       debug_only(has_disp32 = true);
 781       break;
 782 
 783     case 0xAD: // shrd r, a, %cl
 784     case 0xAF: // imul r, a
 785     case 0xBE: // movsbl r, a (movsxb)
 786     case 0xBF: // movswl r, a (movsxw)
 787     case 0xB6: // movzbl r, a (movzxb)
 788     case 0xB7: // movzwl r, a (movzxw)
 789     case REP16(0x40): // cmovl cc, r, a
 790     case 0xB0: // cmpxchgb
 791     case 0xB1: // cmpxchg
 792     case 0xC1: // xaddl
 793     case 0xC7: // cmpxchg8
 794     case REP16(0x90): // setcc a
 795       debug_only(has_disp32 = true);
 796       // fall out of the switch to decode the address
 797       break;
 798 
 799     case 0xC4: // pinsrw r, a, #8
 800       debug_only(has_disp32 = true);
 801     case 0xC5: // pextrw r, r, #8
 802       tail_size = 1;  // the imm8
 803       break;
 804 
 805     case 0xAC: // shrd r, a, #8
 806       debug_only(has_disp32 = true);
 807       tail_size = 1;  // the imm8
 808       break;
 809 
 810     case REP16(0x80): // jcc rdisp32
 811       if (which == end_pc_operand)  return ip + 4;
 812       assert(which == call32_operand, "jcc has no disp32 or imm");
 813       return ip;
 814     default:
 815       ShouldNotReachHere();
 816     }
 817     break;
 818 
 819   case 0x81: // addl a, #32; addl r, #32
 820     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 821     // on 32bit in the case of cmpl, the imm might be an oop
 822     tail_size = 4;
 823     debug_only(has_disp32 = true); // has both kinds of operands!
 824     break;
 825 
 826   case 0x83: // addl a, #8; addl r, #8
 827     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 828     debug_only(has_disp32 = true); // has both kinds of operands!
 829     tail_size = 1;
 830     break;
 831 
 832   case 0x9B:
 833     switch (0xFF & *ip++) {
 834     case 0xD9: // fnstcw a
 835       debug_only(has_disp32 = true);
 836       break;
 837     default:
 838       ShouldNotReachHere();
 839     }
 840     break;
 841 
 842   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
 843   case REP4(0x10): // adc...
 844   case REP4(0x20): // and...
 845   case REP4(0x30): // xor...
 846   case REP4(0x08): // or...
 847   case REP4(0x18): // sbb...
 848   case REP4(0x28): // sub...
 849   case 0xF7: // mull a
 850   case 0x8D: // lea r, a
 851   case 0x87: // xchg r, a
 852   case REP4(0x38): // cmp...
 853   case 0x85: // test r, a
 854     debug_only(has_disp32 = true); // has both kinds of operands!
 855     break;
 856 
 857   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
 858   case 0xC6: // movb a, #8
 859   case 0x80: // cmpb a, #8
 860   case 0x6B: // imul r, a, #8
 861     debug_only(has_disp32 = true); // has both kinds of operands!
 862     tail_size = 1; // the imm8
 863     break;
 864 
 865   case 0xC4: // VEX_3bytes
 866   case 0xC5: // VEX_2bytes
 867     assert((UseAVX > 0), "shouldn't have VEX prefix");
 868     assert(ip == inst+1, "no prefixes allowed");
 869     // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
 870     // but they have prefix 0x0F and processed when 0x0F processed above.
 871     //
 872     // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
 873     // instructions (these instructions are not supported in 64-bit mode).
 874     // To distinguish them bits [7:6] are set in the VEX second byte since
 875     // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
 876     // those VEX bits REX and vvvv bits are inverted.
 877     //
 878     // Fortunately C2 doesn't generate these instructions so we don't need
 879     // to check for them in product version.
 880 
 881     // Check second byte
 882     NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
 883 
 884     int vex_opcode;
 885     // First byte
 886     if ((0xFF & *inst) == VEX_3bytes) {
 887       vex_opcode = VEX_OPCODE_MASK & *ip;
 888       ip++; // third byte
 889       is_64bit = ((VEX_W & *ip) == VEX_W);
 890     } else {
 891       vex_opcode = VEX_OPCODE_0F;
 892     }
 893     ip++; // opcode
 894     // To find the end of instruction (which == end_pc_operand).
 895     switch (vex_opcode) {
 896       case VEX_OPCODE_0F:
 897         switch (0xFF & *ip) {
 898         case 0x70: // pshufd r, r/a, #8
 899         case 0x71: // ps[rl|ra|ll]w r, #8
 900         case 0x72: // ps[rl|ra|ll]d r, #8
 901         case 0x73: // ps[rl|ra|ll]q r, #8
 902         case 0xC2: // cmp[ps|pd|ss|sd] r, r, r/a, #8
 903         case 0xC4: // pinsrw r, r, r/a, #8
 904         case 0xC5: // pextrw r/a, r, #8
 905         case 0xC6: // shufp[s|d] r, r, r/a, #8
 906           tail_size = 1;  // the imm8
 907           break;
 908         }
 909         break;
 910       case VEX_OPCODE_0F_3A:
 911         tail_size = 1;
 912         break;
 913     }
 914     ip++; // skip opcode
 915     debug_only(has_disp32 = true); // has both kinds of operands!
 916     break;
 917 
 918   case 0x62: // EVEX_4bytes
 919     assert(VM_Version::supports_evex(), "shouldn't have EVEX prefix");
 920     assert(ip == inst+1, "no prefixes allowed");
 921     // no EVEX collisions, all instructions that have 0x62 opcodes
 922     // have EVEX versions and are subopcodes of 0x66
 923     ip++; // skip P0 and exmaine W in P1
 924     is_64bit = ((VEX_W & *ip) == VEX_W);
 925     ip++; // move to P2
 926     ip++; // skip P2, move to opcode
 927     // To find the end of instruction (which == end_pc_operand).
 928     switch (0xFF & *ip) {
 929     case 0x22: // pinsrd r, r/a, #8
 930     case 0x61: // pcmpestri r, r/a, #8
 931     case 0x70: // pshufd r, r/a, #8
 932     case 0x73: // psrldq r, #8
 933       tail_size = 1;  // the imm8
 934       break;
 935     default:
 936       break;
 937     }
 938     ip++; // skip opcode
 939     debug_only(has_disp32 = true); // has both kinds of operands!
 940     break;
 941 
 942   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 943   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 944   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 945   case 0xDD: // fld_d a; fst_d a; fstp_d a
 946   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 947   case 0xDF: // fild_d a; fistp_d a
 948   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 949   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 950   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 951     debug_only(has_disp32 = true);
 952     break;
 953 
 954   case 0xE8: // call rdisp32
 955   case 0xE9: // jmp  rdisp32
 956     if (which == end_pc_operand)  return ip + 4;
 957     assert(which == call32_operand, "call has no disp32 or imm");
 958     return ip;
 959 
 960   case 0xF0:                    // Lock
 961     assert(os::is_MP(), "only on MP");
 962     goto again_after_prefix;
 963 
 964   case 0xF3:                    // For SSE
 965   case 0xF2:                    // For SSE2
 966     switch (0xFF & *ip++) {
 967     case REX:
 968     case REX_B:
 969     case REX_X:
 970     case REX_XB:
 971     case REX_R:
 972     case REX_RB:
 973     case REX_RX:
 974     case REX_RXB:
 975     case REX_W:
 976     case REX_WB:
 977     case REX_WX:
 978     case REX_WXB:
 979     case REX_WR:
 980     case REX_WRB:
 981     case REX_WRX:
 982     case REX_WRXB:
 983       NOT_LP64(assert(false, "found 64bit prefix"));
 984       ip++;
 985     default:
 986       ip++;
 987     }
 988     debug_only(has_disp32 = true); // has both kinds of operands!
 989     break;
 990 
 991   default:
 992     ShouldNotReachHere();
 993 
 994 #undef REP8
 995 #undef REP16
 996   }
 997 
 998   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
 999 #ifdef _LP64
1000   assert(which != imm_operand, "instruction is not a movq reg, imm64");
1001 #else
1002   // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
1003   assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
1004 #endif // LP64
1005   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
1006 
1007   // parse the output of emit_operand
1008   int op2 = 0xFF & *ip++;
1009   int base = op2 & 0x07;
1010   int op3 = -1;
1011   const int b100 = 4;
1012   const int b101 = 5;
1013   if (base == b100 && (op2 >> 6) != 3) {
1014     op3 = 0xFF & *ip++;
1015     base = op3 & 0x07;   // refetch the base
1016   }
1017   // now ip points at the disp (if any)
1018 
1019   switch (op2 >> 6) {
1020   case 0:
1021     // [00 reg  100][ss index base]
1022     // [00 reg  100][00   100  esp]
1023     // [00 reg base]
1024     // [00 reg  100][ss index  101][disp32]
1025     // [00 reg  101]               [disp32]
1026 
1027     if (base == b101) {
1028       if (which == disp32_operand)
1029         return ip;              // caller wants the disp32
1030       ip += 4;                  // skip the disp32
1031     }
1032     break;
1033 
1034   case 1:
1035     // [01 reg  100][ss index base][disp8]
1036     // [01 reg  100][00   100  esp][disp8]
1037     // [01 reg base]               [disp8]
1038     ip += 1;                    // skip the disp8
1039     break;
1040 
1041   case 2:
1042     // [10 reg  100][ss index base][disp32]
1043     // [10 reg  100][00   100  esp][disp32]
1044     // [10 reg base]               [disp32]
1045     if (which == disp32_operand)
1046       return ip;                // caller wants the disp32
1047     ip += 4;                    // skip the disp32
1048     break;
1049 
1050   case 3:
1051     // [11 reg base]  (not a memory addressing mode)
1052     break;
1053   }
1054 
1055   if (which == end_pc_operand) {
1056     return ip + tail_size;
1057   }
1058 
1059 #ifdef _LP64
1060   assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
1061 #else
1062   assert(which == imm_operand, "instruction has only an imm field");
1063 #endif // LP64
1064   return ip;
1065 }
1066 
1067 address Assembler::locate_next_instruction(address inst) {
1068   // Secretly share code with locate_operand:
1069   return locate_operand(inst, end_pc_operand);
1070 }
1071 
1072 
1073 #ifdef ASSERT
1074 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
1075   address inst = inst_mark();
1076   assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
1077   address opnd;
1078 
1079   Relocation* r = rspec.reloc();
1080   if (r->type() == relocInfo::none) {
1081     return;
1082   } else if (r->is_call() || format == call32_operand) {
1083     // assert(format == imm32_operand, "cannot specify a nonzero format");
1084     opnd = locate_operand(inst, call32_operand);
1085   } else if (r->is_data()) {
1086     assert(format == imm_operand || format == disp32_operand
1087            LP64_ONLY(|| format == narrow_oop_operand), "format ok");
1088     opnd = locate_operand(inst, (WhichOperand)format);
1089   } else {
1090     assert(format == imm_operand, "cannot specify a format");
1091     return;
1092   }
1093   assert(opnd == pc(), "must put operand where relocs can find it");
1094 }
1095 #endif // ASSERT
1096 
1097 void Assembler::emit_operand32(Register reg, Address adr) {
1098   assert(reg->encoding() < 8, "no extended registers");
1099   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1100   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1101                adr._rspec);
1102 }
1103 
1104 void Assembler::emit_operand(Register reg, Address adr,
1105                              int rip_relative_correction) {
1106   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1107                adr._rspec,
1108                rip_relative_correction);
1109 }
1110 
1111 void Assembler::emit_operand(XMMRegister reg, Address adr) {
1112   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1113                adr._rspec);
1114 }
1115 
1116 // MMX operations
1117 void Assembler::emit_operand(MMXRegister reg, Address adr) {
1118   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1119   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1120 }
1121 
1122 // work around gcc (3.2.1-7a) bug
1123 void Assembler::emit_operand(Address adr, MMXRegister reg) {
1124   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1125   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1126 }
1127 
1128 
1129 void Assembler::emit_farith(int b1, int b2, int i) {
1130   assert(isByte(b1) && isByte(b2), "wrong opcode");
1131   assert(0 <= i &&  i < 8, "illegal stack offset");
1132   emit_int8(b1);
1133   emit_int8(b2 + i);
1134 }
1135 
1136 
1137 // Now the Assembler instructions (identical for 32/64 bits)
1138 
1139 void Assembler::adcl(Address dst, int32_t imm32) {
1140   InstructionMark im(this);
1141   prefix(dst);
1142   emit_arith_operand(0x81, rdx, dst, imm32);
1143 }
1144 
1145 void Assembler::adcl(Address dst, Register src) {
1146   InstructionMark im(this);
1147   prefix(dst, src);
1148   emit_int8(0x11);
1149   emit_operand(src, dst);
1150 }
1151 
1152 void Assembler::adcl(Register dst, int32_t imm32) {
1153   prefix(dst);
1154   emit_arith(0x81, 0xD0, dst, imm32);
1155 }
1156 
1157 void Assembler::adcl(Register dst, Address src) {
1158   InstructionMark im(this);
1159   prefix(src, dst);
1160   emit_int8(0x13);
1161   emit_operand(dst, src);
1162 }
1163 
1164 void Assembler::adcl(Register dst, Register src) {
1165   (void) prefix_and_encode(dst->encoding(), src->encoding());
1166   emit_arith(0x13, 0xC0, dst, src);
1167 }
1168 
1169 void Assembler::addl(Address dst, int32_t imm32) {
1170   InstructionMark im(this);
1171   prefix(dst);
1172   emit_arith_operand(0x81, rax, dst, imm32);
1173 }
1174 
1175 void Assembler::addb(Register dst, Register src) {
1176   (void)prefix_and_encode(dst->encoding(), src->encoding());
1177   emit_arith(0x02, 0xC0, dst, src);
1178 }
1179 
1180 void Assembler::addb(Address dst, int imm8) {
1181   InstructionMark im(this);
1182   prefix(dst);
1183   emit_int8((unsigned char)0x80);
1184   emit_operand(rax, dst, 1);
1185   emit_int8(imm8);
1186 }
1187 
1188 void Assembler::addw(Register dst, Register src) {
1189   (void)prefix_and_encode(dst->encoding(), src->encoding());
1190   emit_arith(0x03, 0xC0, dst, src);
1191 }
1192 
1193 void Assembler::addw(Address dst, int imm16) {
1194   InstructionMark im(this);
1195   emit_int8(0x66);
1196   prefix(dst);
1197   emit_int8((unsigned char)0x81);
1198   emit_operand(rax, dst, 2);
1199   emit_int16(imm16);
1200 }
1201 
1202 void Assembler::addl(Address dst, Register src) {
1203   InstructionMark im(this);
1204   prefix(dst, src);
1205   emit_int8(0x01);
1206   emit_operand(src, dst);
1207 }
1208 
1209 void Assembler::addl(Register dst, int32_t imm32) {
1210   prefix(dst);
1211   emit_arith(0x81, 0xC0, dst, imm32);
1212 }
1213 
1214 void Assembler::addl(Register dst, Address src) {
1215   InstructionMark im(this);
1216   prefix(src, dst);
1217   emit_int8(0x03);
1218   emit_operand(dst, src);
1219 }
1220 
1221 void Assembler::addl(Register dst, Register src) {
1222   (void) prefix_and_encode(dst->encoding(), src->encoding());
1223   emit_arith(0x03, 0xC0, dst, src);
1224 }
1225 
1226 void Assembler::addr_nop_4() {
1227   assert(UseAddressNop, "no CPU support");
1228   // 4 bytes: NOP DWORD PTR [EAX+0]
1229   emit_int8(0x0F);
1230   emit_int8(0x1F);
1231   emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
1232   emit_int8(0);    // 8-bits offset (1 byte)
1233 }
1234 
1235 void Assembler::addr_nop_5() {
1236   assert(UseAddressNop, "no CPU support");
1237   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
1238   emit_int8(0x0F);
1239   emit_int8(0x1F);
1240   emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
1241   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1242   emit_int8(0);    // 8-bits offset (1 byte)
1243 }
1244 
1245 void Assembler::addr_nop_7() {
1246   assert(UseAddressNop, "no CPU support");
1247   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
1248   emit_int8(0x0F);
1249   emit_int8(0x1F);
1250   emit_int8((unsigned char)0x80);
1251                    // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
1252   emit_int32(0);   // 32-bits offset (4 bytes)
1253 }
1254 
1255 void Assembler::addr_nop_8() {
1256   assert(UseAddressNop, "no CPU support");
1257   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1258   emit_int8(0x0F);
1259   emit_int8(0x1F);
1260   emit_int8((unsigned char)0x84);
1261                    // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
1262   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1263   emit_int32(0);   // 32-bits offset (4 bytes)
1264 }
1265 
1266 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
1267   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1268   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1269   attributes.set_rex_vex_w_reverted();
1270   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1271   emit_int8(0x58);
1272   emit_int8((unsigned char)(0xC0 | encode));
1273 }
1274 
1275 void Assembler::addsd(XMMRegister dst, Address src) {
1276   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1277   InstructionMark im(this);
1278   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1279   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1280   attributes.set_rex_vex_w_reverted();
1281   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1282   emit_int8(0x58);
1283   emit_operand(dst, src);
1284 }
1285 
1286 void Assembler::addss(XMMRegister dst, XMMRegister src) {
1287   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1288   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1289   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1290   emit_int8(0x58);
1291   emit_int8((unsigned char)(0xC0 | encode));
1292 }
1293 
1294 void Assembler::addss(XMMRegister dst, Address src) {
1295   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1296   InstructionMark im(this);
1297   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1298   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1299   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1300   emit_int8(0x58);
1301   emit_operand(dst, src);
1302 }
1303 
1304 void Assembler::aesdec(XMMRegister dst, Address src) {
1305   assert(VM_Version::supports_aes(), "");
1306   InstructionMark im(this);
1307   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1308   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1309   emit_int8((unsigned char)0xDE);
1310   emit_operand(dst, src);
1311 }
1312 
1313 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1314   assert(VM_Version::supports_aes(), "");
1315   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1316   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1317   emit_int8((unsigned char)0xDE);
1318   emit_int8(0xC0 | encode);
1319 }
1320 
1321 void Assembler::aesdeclast(XMMRegister dst, Address src) {
1322   assert(VM_Version::supports_aes(), "");
1323   InstructionMark im(this);
1324   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1325   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1326   emit_int8((unsigned char)0xDF);
1327   emit_operand(dst, src);
1328 }
1329 
1330 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1331   assert(VM_Version::supports_aes(), "");
1332   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1333   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1334   emit_int8((unsigned char)0xDF);
1335   emit_int8((unsigned char)(0xC0 | encode));
1336 }
1337 
1338 void Assembler::aesenc(XMMRegister dst, Address src) {
1339   assert(VM_Version::supports_aes(), "");
1340   InstructionMark im(this);
1341   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1342   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1343   emit_int8((unsigned char)0xDC);
1344   emit_operand(dst, src);
1345 }
1346 
1347 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1348   assert(VM_Version::supports_aes(), "");
1349   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1350   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1351   emit_int8((unsigned char)0xDC);
1352   emit_int8(0xC0 | encode);
1353 }
1354 
1355 void Assembler::aesenclast(XMMRegister dst, Address src) {
1356   assert(VM_Version::supports_aes(), "");
1357   InstructionMark im(this);
1358   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1359   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1360   emit_int8((unsigned char)0xDD);
1361   emit_operand(dst, src);
1362 }
1363 
1364 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1365   assert(VM_Version::supports_aes(), "");
1366   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1367   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1368   emit_int8((unsigned char)0xDD);
1369   emit_int8((unsigned char)(0xC0 | encode));
1370 }
1371 
1372 void Assembler::andb(Register dst, Register src) {
1373   (void)prefix_and_encode(dst->encoding(), src->encoding());
1374   emit_arith(0x22, 0xC0, dst, src);
1375 }
1376 
1377 void Assembler::andw(Register dst, Register src) {
1378   (void)prefix_and_encode(dst->encoding(), src->encoding());
1379   emit_arith(0x23, 0xC0, dst, src);
1380 }
1381 
1382 void Assembler::andl(Address dst, int32_t imm32) {
1383   InstructionMark im(this);
1384   prefix(dst);
1385   emit_int8((unsigned char)0x81);
1386   emit_operand(rsp, dst, 4);
1387   emit_int32(imm32);
1388 }
1389 
1390 void Assembler::andl(Register dst, int32_t imm32) {
1391   prefix(dst);
1392   emit_arith(0x81, 0xE0, dst, imm32);
1393 }
1394 
1395 void Assembler::andl(Register dst, Address src) {
1396   InstructionMark im(this);
1397   prefix(src, dst);
1398   emit_int8(0x23);
1399   emit_operand(dst, src);
1400 }
1401 
1402 void Assembler::andl(Register dst, Register src) {
1403   (void) prefix_and_encode(dst->encoding(), src->encoding());
1404   emit_arith(0x23, 0xC0, dst, src);
1405 }
1406 
1407 void Assembler::andnl(Register dst, Register src1, Register src2) {
1408   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1409   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1410   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1411   emit_int8((unsigned char)0xF2);
1412   emit_int8((unsigned char)(0xC0 | encode));
1413 }
1414 
1415 void Assembler::andnl(Register dst, Register src1, Address src2) {
1416   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1417   InstructionMark im(this);
1418   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1419   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1420   emit_int8((unsigned char)0xF2);
1421   emit_operand(dst, src2);
1422 }
1423 
1424 void Assembler::bsfl(Register dst, Register src) {
1425   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1426   emit_int8(0x0F);
1427   emit_int8((unsigned char)0xBC);
1428   emit_int8((unsigned char)(0xC0 | encode));
1429 }
1430 
1431 void Assembler::bsrl(Register dst, Register src) {
1432   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1433   emit_int8(0x0F);
1434   emit_int8((unsigned char)0xBD);
1435   emit_int8((unsigned char)(0xC0 | encode));
1436 }
1437 
1438 void Assembler::bswapl(Register reg) { // bswap
1439   int encode = prefix_and_encode(reg->encoding());
1440   emit_int8(0x0F);
1441   emit_int8((unsigned char)(0xC8 | encode));
1442 }
1443 
1444 void Assembler::blsil(Register dst, Register src) {
1445   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1446   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1447   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1448   emit_int8((unsigned char)0xF3);
1449   emit_int8((unsigned char)(0xC0 | encode));
1450 }
1451 
1452 void Assembler::blsil(Register dst, Address src) {
1453   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1454   InstructionMark im(this);
1455   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1456   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1457   emit_int8((unsigned char)0xF3);
1458   emit_operand(rbx, src);
1459 }
1460 
1461 void Assembler::blsmskl(Register dst, Register src) {
1462   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1463   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1464   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1465   emit_int8((unsigned char)0xF3);
1466   emit_int8((unsigned char)(0xC0 | encode));
1467 }
1468 
1469 void Assembler::blsmskl(Register dst, Address src) {
1470   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1471   InstructionMark im(this);
1472   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1473   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1474   emit_int8((unsigned char)0xF3);
1475   emit_operand(rdx, src);
1476 }
1477 
1478 void Assembler::blsrl(Register dst, Register src) {
1479   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1480   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1481   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1482   emit_int8((unsigned char)0xF3);
1483   emit_int8((unsigned char)(0xC0 | encode));
1484 }
1485 
1486 void Assembler::blsrl(Register dst, Address src) {
1487   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1488   InstructionMark im(this);
1489   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1490   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1491   emit_int8((unsigned char)0xF3);
1492   emit_operand(rcx, src);
1493 }
1494 
1495 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1496   // suspect disp32 is always good
1497   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1498 
1499   if (L.is_bound()) {
1500     const int long_size = 5;
1501     int offs = (int)( target(L) - pc() );
1502     assert(offs <= 0, "assembler error");
1503     InstructionMark im(this);
1504     // 1110 1000 #32-bit disp
1505     emit_int8((unsigned char)0xE8);
1506     emit_data(offs - long_size, rtype, operand);
1507   } else {
1508     InstructionMark im(this);
1509     // 1110 1000 #32-bit disp
1510     L.add_patch_at(code(), locator());
1511 
1512     emit_int8((unsigned char)0xE8);
1513     emit_data(int(0), rtype, operand);
1514   }
1515 }
1516 
1517 void Assembler::call(Register dst) {
1518   int encode = prefix_and_encode(dst->encoding());
1519   emit_int8((unsigned char)0xFF);
1520   emit_int8((unsigned char)(0xD0 | encode));
1521 }
1522 
1523 
1524 void Assembler::call(Address adr) {
1525   InstructionMark im(this);
1526   prefix(adr);
1527   emit_int8((unsigned char)0xFF);
1528   emit_operand(rdx, adr);
1529 }
1530 
1531 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1532   InstructionMark im(this);
1533   emit_int8((unsigned char)0xE8);
1534   intptr_t disp = entry - (pc() + sizeof(int32_t));
1535   // Entry is NULL in case of a scratch emit.
1536   assert(entry == NULL || is_simm32(disp), "disp=" INTPTR_FORMAT " must be 32bit offset (call2)", disp);
1537   // Technically, should use call32_operand, but this format is
1538   // implied by the fact that we're emitting a call instruction.
1539 
1540   int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1541   emit_data((int) disp, rspec, operand);
1542 }
1543 
1544 void Assembler::cdql() {
1545   emit_int8((unsigned char)0x99);
1546 }
1547 
1548 void Assembler::cld() {
1549   emit_int8((unsigned char)0xFC);
1550 }
1551 
1552 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1553   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1554   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1555   emit_int8(0x0F);
1556   emit_int8(0x40 | cc);
1557   emit_int8((unsigned char)(0xC0 | encode));
1558 }
1559 
1560 
1561 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1562   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1563   prefix(src, dst);
1564   emit_int8(0x0F);
1565   emit_int8(0x40 | cc);
1566   emit_operand(dst, src);
1567 }
1568 
1569 void Assembler::cmpb(Address dst, int imm8) {
1570   InstructionMark im(this);
1571   prefix(dst);
1572   emit_int8((unsigned char)0x80);
1573   emit_operand(rdi, dst, 1);
1574   emit_int8(imm8);
1575 }
1576 
1577 void Assembler::cmpl(Address dst, int32_t imm32) {
1578   InstructionMark im(this);
1579   prefix(dst);
1580   emit_int8((unsigned char)0x81);
1581   emit_operand(rdi, dst, 4);
1582   emit_int32(imm32);
1583 }
1584 
1585 void Assembler::cmpl(Register dst, int32_t imm32) {
1586   prefix(dst);
1587   emit_arith(0x81, 0xF8, dst, imm32);
1588 }
1589 
1590 void Assembler::cmpl(Register dst, Register src) {
1591   (void) prefix_and_encode(dst->encoding(), src->encoding());
1592   emit_arith(0x3B, 0xC0, dst, src);
1593 }
1594 
1595 void Assembler::cmpl(Register dst, Address  src) {
1596   InstructionMark im(this);
1597   prefix(src, dst);
1598   emit_int8((unsigned char)0x3B);
1599   emit_operand(dst, src);
1600 }
1601 
1602 void Assembler::cmpw(Address dst, int imm16) {
1603   InstructionMark im(this);
1604   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1605   emit_int8(0x66);
1606   emit_int8((unsigned char)0x81);
1607   emit_operand(rdi, dst, 2);
1608   emit_int16(imm16);
1609 }
1610 
1611 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1612 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1613 // The ZF is set if the compared values were equal, and cleared otherwise.
1614 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1615   InstructionMark im(this);
1616   prefix(adr, reg);
1617   emit_int8(0x0F);
1618   emit_int8((unsigned char)0xB1);
1619   emit_operand(reg, adr);
1620 }
1621 
1622 // The 8-bit cmpxchg compares the value at adr with the contents of rax,
1623 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1624 // The ZF is set if the compared values were equal, and cleared otherwise.
1625 void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg
1626   InstructionMark im(this);
1627   prefix(adr, reg, true);
1628   emit_int8(0x0F);
1629   emit_int8((unsigned char)0xB0);
1630   emit_operand(reg, adr);
1631 }
1632 
1633 void Assembler::comisd(XMMRegister dst, Address src) {
1634   // NOTE: dbx seems to decode this as comiss even though the
1635   // 0x66 is there. Strangly ucomisd comes out correct
1636   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1637   InstructionMark im(this);
1638   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);;
1639   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1640   attributes.set_rex_vex_w_reverted();
1641   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1642   emit_int8(0x2F);
1643   emit_operand(dst, src);
1644 }
1645 
1646 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1647   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1648   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1649   attributes.set_rex_vex_w_reverted();
1650   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1651   emit_int8(0x2F);
1652   emit_int8((unsigned char)(0xC0 | encode));
1653 }
1654 
1655 void Assembler::comiss(XMMRegister dst, Address src) {
1656   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1657   InstructionMark im(this);
1658   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1659   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1660   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1661   emit_int8(0x2F);
1662   emit_operand(dst, src);
1663 }
1664 
1665 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1666   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1667   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1668   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1669   emit_int8(0x2F);
1670   emit_int8((unsigned char)(0xC0 | encode));
1671 }
1672 
1673 void Assembler::cpuid() {
1674   emit_int8(0x0F);
1675   emit_int8((unsigned char)0xA2);
1676 }
1677 
1678 // Opcode / Instruction                      Op /  En  64 - Bit Mode     Compat / Leg Mode Description                  Implemented
1679 // F2 0F 38 F0 / r       CRC32 r32, r / m8   RM        Valid             Valid             Accumulate CRC32 on r / m8.  v
1680 // F2 REX 0F 38 F0 / r   CRC32 r32, r / m8*  RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1681 // F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8   RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1682 //
1683 // F2 0F 38 F1 / r       CRC32 r32, r / m16  RM        Valid             Valid             Accumulate CRC32 on r / m16. v
1684 //
1685 // F2 0F 38 F1 / r       CRC32 r32, r / m32  RM        Valid             Valid             Accumulate CRC32 on r / m32. v
1686 //
1687 // F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64  RM        Valid             N.E.              Accumulate CRC32 on r / m64. v
1688 void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) {
1689   assert(VM_Version::supports_sse4_2(), "");
1690   int8_t w = 0x01;
1691   Prefix p = Prefix_EMPTY;
1692 
1693   emit_int8((int8_t)0xF2);
1694   switch (sizeInBytes) {
1695   case 1:
1696     w = 0;
1697     break;
1698   case 2:
1699   case 4:
1700     break;
1701   LP64_ONLY(case 8:)
1702     // This instruction is not valid in 32 bits
1703     // Note:
1704     // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
1705     //
1706     // Page B - 72   Vol. 2C says
1707     // qwreg2 to qwreg            1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2
1708     // mem64 to qwreg             1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m
1709     //                                                                            F0!!!
1710     // while 3 - 208 Vol. 2A
1711     // F2 REX.W 0F 38 F1 / r       CRC32 r64, r / m64             RM         Valid      N.E.Accumulate CRC32 on r / m64.
1712     //
1713     // the 0 on a last bit is reserved for a different flavor of this instruction :
1714     // F2 REX.W 0F 38 F0 / r       CRC32 r64, r / m8              RM         Valid      N.E.Accumulate CRC32 on r / m8.
1715     p = REX_W;
1716     break;
1717   default:
1718     assert(0, "Unsupported value for a sizeInBytes argument");
1719     break;
1720   }
1721   LP64_ONLY(prefix(crc, v, p);)
1722   emit_int8((int8_t)0x0F);
1723   emit_int8(0x38);
1724   emit_int8((int8_t)(0xF0 | w));
1725   emit_int8(0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7));
1726 }
1727 
1728 void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) {
1729   assert(VM_Version::supports_sse4_2(), "");
1730   InstructionMark im(this);
1731   int8_t w = 0x01;
1732   Prefix p = Prefix_EMPTY;
1733 
1734   emit_int8((int8_t)0xF2);
1735   switch (sizeInBytes) {
1736   case 1:
1737     w = 0;
1738     break;
1739   case 2:
1740   case 4:
1741     break;
1742   LP64_ONLY(case 8:)
1743     // This instruction is not valid in 32 bits
1744     p = REX_W;
1745     break;
1746   default:
1747     assert(0, "Unsupported value for a sizeInBytes argument");
1748     break;
1749   }
1750   LP64_ONLY(prefix(crc, adr, p);)
1751   emit_int8((int8_t)0x0F);
1752   emit_int8(0x38);
1753   emit_int8((int8_t)(0xF0 | w));
1754   emit_operand(crc, adr);
1755 }
1756 
1757 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1758   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1759   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1760   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1761   emit_int8((unsigned char)0xE6);
1762   emit_int8((unsigned char)(0xC0 | encode));
1763 }
1764 
1765 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1766   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1767   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1768   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1769   emit_int8(0x5B);
1770   emit_int8((unsigned char)(0xC0 | encode));
1771 }
1772 
1773 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1774   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1775   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1776   attributes.set_rex_vex_w_reverted();
1777   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1778   emit_int8(0x5A);
1779   emit_int8((unsigned char)(0xC0 | encode));
1780 }
1781 
1782 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1783   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1784   InstructionMark im(this);
1785   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1786   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1787   attributes.set_rex_vex_w_reverted();
1788   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1789   emit_int8(0x5A);
1790   emit_operand(dst, src);
1791 }
1792 
1793 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1794   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1795   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1796   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1797   emit_int8(0x2A);
1798   emit_int8((unsigned char)(0xC0 | encode));
1799 }
1800 
1801 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1802   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1803   InstructionMark im(this);
1804   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1805   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1806   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1807   emit_int8(0x2A);
1808   emit_operand(dst, src);
1809 }
1810 
1811 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1812   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1813   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1814   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1815   emit_int8(0x2A);
1816   emit_int8((unsigned char)(0xC0 | encode));
1817 }
1818 
1819 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1820   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1821   InstructionMark im(this);
1822   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1823   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1824   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1825   emit_int8(0x2A);
1826   emit_operand(dst, src);
1827 }
1828 
1829 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
1830   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1831   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1832   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1833   emit_int8(0x2A);
1834   emit_int8((unsigned char)(0xC0 | encode));
1835 }
1836 
1837 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1838   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1839   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1840   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1841   emit_int8(0x5A);
1842   emit_int8((unsigned char)(0xC0 | encode));
1843 }
1844 
1845 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1846   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1847   InstructionMark im(this);
1848   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1849   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1850   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1851   emit_int8(0x5A);
1852   emit_operand(dst, src);
1853 }
1854 
1855 
1856 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1857   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1858   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1859   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1860   emit_int8(0x2C);
1861   emit_int8((unsigned char)(0xC0 | encode));
1862 }
1863 
1864 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1865   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1866   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1867   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1868   emit_int8(0x2C);
1869   emit_int8((unsigned char)(0xC0 | encode));
1870 }
1871 
1872 void Assembler::cvttpd2dq(XMMRegister dst, XMMRegister src) {
1873   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1874   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
1875   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1876   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1877   emit_int8((unsigned char)0xE6);
1878   emit_int8((unsigned char)(0xC0 | encode));
1879 }
1880 
1881 void Assembler::vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len) {
1882   assert(UseAVX > 0 && (vector_len == AVX_128bit || vector_len == AVX_256bit), "");
1883   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1884   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1885   emit_int8((unsigned char)0x5A);
1886   emit_int8((unsigned char)(0xC0 | encode));
1887 
1888 }
1889 
1890 void Assembler::evcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len) {
1891   assert(UseAVX > 2, "");
1892   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1893   attributes.set_is_evex_instruction();
1894   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1895   emit_int8((unsigned char)0x5A);
1896   emit_int8((unsigned char)(0xC0 | encode));
1897 }
1898 
1899 void Assembler::pabsb(XMMRegister dst, XMMRegister src) {
1900   assert(VM_Version::supports_ssse3(), "");
1901   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1902   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1903   emit_int8(0x1C);
1904   emit_int8((unsigned char)(0xC0 | encode));
1905 }
1906 
1907 void Assembler::pabsw(XMMRegister dst, XMMRegister src) {
1908   assert(VM_Version::supports_ssse3(), "");
1909   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1910   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1911   emit_int8(0x1D);
1912   emit_int8((unsigned char)(0xC0 | encode));
1913 }
1914 
1915 void Assembler::pabsd(XMMRegister dst, XMMRegister src) {
1916   assert(VM_Version::supports_ssse3(), "");
1917   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1918   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1919   emit_int8(0x1E);
1920   emit_int8((unsigned char)(0xC0 | encode));
1921 }
1922 
1923 void Assembler::vpabsb(XMMRegister dst, XMMRegister src, int vector_len) {
1924   assert(UseAVX > 0 && (vector_len == AVX_128bit || vector_len == AVX_256bit), "");
1925   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1926   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1927   emit_int8((unsigned char)0x1C);
1928   emit_int8((unsigned char)(0xC0 | encode));
1929 }
1930 
1931 void Assembler::vpabsw(XMMRegister dst, XMMRegister src, int vector_len) {
1932   assert(UseAVX > 0 && (vector_len == AVX_128bit || vector_len == AVX_256bit), "");
1933   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1934   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1935   emit_int8((unsigned char)0x1D);
1936   emit_int8((unsigned char)(0xC0 | encode));
1937 }
1938 
1939 void Assembler::vpabsd(XMMRegister dst, XMMRegister src, int vector_len) {
1940   assert(UseAVX > 0 && (vector_len == AVX_128bit || vector_len == AVX_256bit), "");
1941   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1942   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1943   emit_int8((unsigned char)0x1E);
1944   emit_int8((unsigned char)(0xC0 | encode));
1945 }
1946 
1947 void Assembler::evpabsb(XMMRegister dst, XMMRegister src, int vector_len) {
1948   assert(UseAVX > 2, "");
1949   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1950   attributes.set_is_evex_instruction();
1951   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1952   emit_int8((unsigned char)0x1C);
1953   emit_int8((unsigned char)(0xC0 | encode));
1954 }
1955 
1956 void Assembler::evpabsw(XMMRegister dst, XMMRegister src, int vector_len) {
1957   assert(UseAVX > 2, "");
1958   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1959   attributes.set_is_evex_instruction();
1960   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1961   emit_int8((unsigned char)0x1D);
1962   emit_int8((unsigned char)(0xC0 | encode));
1963 }
1964 
1965 void Assembler::evpabsd(XMMRegister dst, XMMRegister src, int vector_len) {
1966   assert(UseAVX > 2, "");
1967   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1968   attributes.set_is_evex_instruction();
1969   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1970   emit_int8((unsigned char)0x1E);
1971   emit_int8((unsigned char)(0xC0 | encode));
1972 }
1973 
1974 void Assembler::evpabsq(XMMRegister dst, XMMRegister src, int vector_len) {
1975   assert(UseAVX > 2, "");
1976   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1977   attributes.set_is_evex_instruction();
1978   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1979   emit_int8((unsigned char)0x1F);
1980   emit_int8((unsigned char)(0xC0 | encode));
1981 }
1982 
1983 void Assembler::decl(Address dst) {
1984   // Don't use it directly. Use MacroAssembler::decrement() instead.
1985   InstructionMark im(this);
1986   prefix(dst);
1987   emit_int8((unsigned char)0xFF);
1988   emit_operand(rcx, dst);
1989 }
1990 
1991 void Assembler::divsd(XMMRegister dst, Address src) {
1992   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1993   InstructionMark im(this);
1994   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1995   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1996   attributes.set_rex_vex_w_reverted();
1997   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1998   emit_int8(0x5E);
1999   emit_operand(dst, src);
2000 }
2001 
2002 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
2003   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2004   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2005   attributes.set_rex_vex_w_reverted();
2006   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2007   emit_int8(0x5E);
2008   emit_int8((unsigned char)(0xC0 | encode));
2009 }
2010 
2011 void Assembler::divss(XMMRegister dst, Address src) {
2012   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2013   InstructionMark im(this);
2014   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2015   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2016   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2017   emit_int8(0x5E);
2018   emit_operand(dst, src);
2019 }
2020 
2021 void Assembler::divss(XMMRegister dst, XMMRegister src) {
2022   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2023   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2024   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2025   emit_int8(0x5E);
2026   emit_int8((unsigned char)(0xC0 | encode));
2027 }
2028 
2029 void Assembler::emms() {
2030   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
2031   emit_int8(0x0F);
2032   emit_int8(0x77);
2033 }
2034 
2035 void Assembler::hlt() {
2036   emit_int8((unsigned char)0xF4);
2037 }
2038 
2039 void Assembler::idivl(Register src) {
2040   int encode = prefix_and_encode(src->encoding());
2041   emit_int8((unsigned char)0xF7);
2042   emit_int8((unsigned char)(0xF8 | encode));
2043 }
2044 
2045 void Assembler::divl(Register src) { // Unsigned
2046   int encode = prefix_and_encode(src->encoding());
2047   emit_int8((unsigned char)0xF7);
2048   emit_int8((unsigned char)(0xF0 | encode));
2049 }
2050 
2051 void Assembler::imull(Register src) {
2052   int encode = prefix_and_encode(src->encoding());
2053   emit_int8((unsigned char)0xF7);
2054   emit_int8((unsigned char)(0xE8 | encode));
2055 }
2056 
2057 void Assembler::imull(Register dst, Register src) {
2058   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2059   emit_int8(0x0F);
2060   emit_int8((unsigned char)0xAF);
2061   emit_int8((unsigned char)(0xC0 | encode));
2062 }
2063 
2064 
2065 void Assembler::imull(Register dst, Register src, int value) {
2066   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2067   if (is8bit(value)) {
2068     emit_int8(0x6B);
2069     emit_int8((unsigned char)(0xC0 | encode));
2070     emit_int8(value & 0xFF);
2071   } else {
2072     emit_int8(0x69);
2073     emit_int8((unsigned char)(0xC0 | encode));
2074     emit_int32(value);
2075   }
2076 }
2077 
2078 void Assembler::imull(Register dst, Address src) {
2079   InstructionMark im(this);
2080   prefix(src, dst);
2081   emit_int8(0x0F);
2082   emit_int8((unsigned char) 0xAF);
2083   emit_operand(dst, src);
2084 }
2085 
2086 
2087 void Assembler::incl(Address dst) {
2088   // Don't use it directly. Use MacroAssembler::increment() instead.
2089   InstructionMark im(this);
2090   prefix(dst);
2091   emit_int8((unsigned char)0xFF);
2092   emit_operand(rax, dst);
2093 }
2094 
2095 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
2096   InstructionMark im(this);
2097   assert((0 <= cc) && (cc < 16), "illegal cc");
2098   if (L.is_bound()) {
2099     address dst = target(L);
2100     assert(dst != NULL, "jcc most probably wrong");
2101 
2102     const int short_size = 2;
2103     const int long_size = 6;
2104     intptr_t offs = (intptr_t)dst - (intptr_t)pc();
2105     if (maybe_short && is8bit(offs - short_size)) {
2106       // 0111 tttn #8-bit disp
2107       emit_int8(0x70 | cc);
2108       emit_int8((offs - short_size) & 0xFF);
2109     } else {
2110       // 0000 1111 1000 tttn #32-bit disp
2111       assert(is_simm32(offs - long_size),
2112              "must be 32bit offset (call4)");
2113       emit_int8(0x0F);
2114       emit_int8((unsigned char)(0x80 | cc));
2115       emit_int32(offs - long_size);
2116     }
2117   } else {
2118     // Note: could eliminate cond. jumps to this jump if condition
2119     //       is the same however, seems to be rather unlikely case.
2120     // Note: use jccb() if label to be bound is very close to get
2121     //       an 8-bit displacement
2122     L.add_patch_at(code(), locator());
2123     emit_int8(0x0F);
2124     emit_int8((unsigned char)(0x80 | cc));
2125     emit_int32(0);
2126   }
2127 }
2128 
2129 void Assembler::jccb(Condition cc, Label& L) {
2130   if (L.is_bound()) {
2131     const int short_size = 2;
2132     address entry = target(L);
2133 #ifdef ASSERT
2134     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2135     intptr_t delta = short_branch_delta();
2136     if (delta != 0) {
2137       dist += (dist < 0 ? (-delta) :delta);
2138     }
2139     assert(is8bit(dist), "Dispacement too large for a short jmp");
2140 #endif
2141     intptr_t offs = (intptr_t)entry - (intptr_t)pc();
2142     // 0111 tttn #8-bit disp
2143     emit_int8(0x70 | cc);
2144     emit_int8((offs - short_size) & 0xFF);
2145   } else {
2146     InstructionMark im(this);
2147     L.add_patch_at(code(), locator());
2148     emit_int8(0x70 | cc);
2149     emit_int8(0);
2150   }
2151 }
2152 
2153 void Assembler::jmp(Address adr) {
2154   InstructionMark im(this);
2155   prefix(adr);
2156   emit_int8((unsigned char)0xFF);
2157   emit_operand(rsp, adr);
2158 }
2159 
2160 void Assembler::jmp(Label& L, bool maybe_short) {
2161   if (L.is_bound()) {
2162     address entry = target(L);
2163     assert(entry != NULL, "jmp most probably wrong");
2164     InstructionMark im(this);
2165     const int short_size = 2;
2166     const int long_size = 5;
2167     intptr_t offs = entry - pc();
2168     if (maybe_short && is8bit(offs - short_size)) {
2169       emit_int8((unsigned char)0xEB);
2170       emit_int8((offs - short_size) & 0xFF);
2171     } else {
2172       emit_int8((unsigned char)0xE9);
2173       emit_int32(offs - long_size);
2174     }
2175   } else {
2176     // By default, forward jumps are always 32-bit displacements, since
2177     // we can't yet know where the label will be bound.  If you're sure that
2178     // the forward jump will not run beyond 256 bytes, use jmpb to
2179     // force an 8-bit displacement.
2180     InstructionMark im(this);
2181     L.add_patch_at(code(), locator());
2182     emit_int8((unsigned char)0xE9);
2183     emit_int32(0);
2184   }
2185 }
2186 
2187 void Assembler::jmp(Register entry) {
2188   int encode = prefix_and_encode(entry->encoding());
2189   emit_int8((unsigned char)0xFF);
2190   emit_int8((unsigned char)(0xE0 | encode));
2191 }
2192 
2193 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
2194   InstructionMark im(this);
2195   emit_int8((unsigned char)0xE9);
2196   assert(dest != NULL, "must have a target");
2197   intptr_t disp = dest - (pc() + sizeof(int32_t));
2198   assert(is_simm32(disp), "must be 32bit offset (jmp)");
2199   emit_data(disp, rspec.reloc(), call32_operand);
2200 }
2201 
2202 void Assembler::jmpb(Label& L) {
2203   if (L.is_bound()) {
2204     const int short_size = 2;
2205     address entry = target(L);
2206     assert(entry != NULL, "jmp most probably wrong");
2207 #ifdef ASSERT
2208     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2209     intptr_t delta = short_branch_delta();
2210     if (delta != 0) {
2211       dist += (dist < 0 ? (-delta) :delta);
2212     }
2213     assert(is8bit(dist), "Dispacement too large for a short jmp");
2214 #endif
2215     intptr_t offs = entry - pc();
2216     emit_int8((unsigned char)0xEB);
2217     emit_int8((offs - short_size) & 0xFF);
2218   } else {
2219     InstructionMark im(this);
2220     L.add_patch_at(code(), locator());
2221     emit_int8((unsigned char)0xEB);
2222     emit_int8(0);
2223   }
2224 }
2225 
2226 void Assembler::ldmxcsr( Address src) {
2227   if (UseAVX > 0 ) {
2228     InstructionMark im(this);
2229     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2230     vex_prefix(src, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2231     emit_int8((unsigned char)0xAE);
2232     emit_operand(as_Register(2), src);
2233   } else {
2234     NOT_LP64(assert(VM_Version::supports_sse(), ""));
2235     InstructionMark im(this);
2236     prefix(src);
2237     emit_int8(0x0F);
2238     emit_int8((unsigned char)0xAE);
2239     emit_operand(as_Register(2), src);
2240   }
2241 }
2242 
2243 void Assembler::leal(Register dst, Address src) {
2244   InstructionMark im(this);
2245 #ifdef _LP64
2246   emit_int8(0x67); // addr32
2247   prefix(src, dst);
2248 #endif // LP64
2249   emit_int8((unsigned char)0x8D);
2250   emit_operand(dst, src);
2251 }
2252 
2253 void Assembler::lfence() {
2254   emit_int8(0x0F);
2255   emit_int8((unsigned char)0xAE);
2256   emit_int8((unsigned char)0xE8);
2257 }
2258 
2259 void Assembler::lock() {
2260   emit_int8((unsigned char)0xF0);
2261 }
2262 
2263 void Assembler::lzcntl(Register dst, Register src) {
2264   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
2265   emit_int8((unsigned char)0xF3);
2266   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2267   emit_int8(0x0F);
2268   emit_int8((unsigned char)0xBD);
2269   emit_int8((unsigned char)(0xC0 | encode));
2270 }
2271 
2272 // Emit mfence instruction
2273 void Assembler::mfence() {
2274   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
2275   emit_int8(0x0F);
2276   emit_int8((unsigned char)0xAE);
2277   emit_int8((unsigned char)0xF0);
2278 }
2279 
2280 void Assembler::mov(Register dst, Register src) {
2281   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2282 }
2283 
2284 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
2285   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2286   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2287   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2288   attributes.set_rex_vex_w_reverted();
2289   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2290   emit_int8(0x28);
2291   emit_int8((unsigned char)(0xC0 | encode));
2292 }
2293 
2294 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
2295   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2296   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2297   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2298   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2299   emit_int8(0x28);
2300   emit_int8((unsigned char)(0xC0 | encode));
2301 }
2302 
2303 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
2304   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2305   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2306   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2307   emit_int8(0x16);
2308   emit_int8((unsigned char)(0xC0 | encode));
2309 }
2310 
2311 void Assembler::movb(Register dst, Address src) {
2312   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2313   InstructionMark im(this);
2314   prefix(src, dst, true);
2315   emit_int8((unsigned char)0x8A);
2316   emit_operand(dst, src);
2317 }
2318 
2319 void Assembler::movddup(XMMRegister dst, XMMRegister src) {
2320   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
2321   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2322   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2323   attributes.set_rex_vex_w_reverted();
2324   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2325   emit_int8(0x12);
2326   emit_int8(0xC0 | encode);
2327 }
2328 
2329 void Assembler::kmovbl(KRegister dst, Register src) {
2330   assert(VM_Version::supports_avx512dq(), "");
2331   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2332   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2333   emit_int8((unsigned char)0x92);
2334   emit_int8((unsigned char)(0xC0 | encode));
2335 }
2336 
2337 void Assembler::kmovbl(Register dst, KRegister src) {
2338   assert(VM_Version::supports_avx512dq(), "");
2339   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2340   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2341   emit_int8((unsigned char)0x93);
2342   emit_int8((unsigned char)(0xC0 | encode));
2343 }
2344 
2345 void Assembler::kmovwl(KRegister dst, Register src) {
2346   assert(VM_Version::supports_evex(), "");
2347   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2348   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2349   emit_int8((unsigned char)0x92);
2350   emit_int8((unsigned char)(0xC0 | encode));
2351 }
2352 
2353 void Assembler::kmovwl(Register dst, KRegister src) {
2354   assert(VM_Version::supports_evex(), "");
2355   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2356   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2357   emit_int8((unsigned char)0x93);
2358   emit_int8((unsigned char)(0xC0 | encode));
2359 }
2360 
2361 void Assembler::kmovwl(KRegister dst, Address src) {
2362   assert(VM_Version::supports_evex(), "");
2363   InstructionMark im(this);
2364   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2365   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2366   emit_int8((unsigned char)0x90);
2367   emit_operand((Register)dst, src);
2368 }
2369 
2370 void Assembler::kmovdl(KRegister dst, Register src) {
2371   assert(VM_Version::supports_avx512bw(), "");
2372   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2373   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2374   emit_int8((unsigned char)0x92);
2375   emit_int8((unsigned char)(0xC0 | encode));
2376 }
2377 
2378 void Assembler::kmovdl(Register dst, KRegister src) {
2379   assert(VM_Version::supports_avx512bw(), "");
2380   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2381   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2382   emit_int8((unsigned char)0x93);
2383   emit_int8((unsigned char)(0xC0 | encode));
2384 }
2385 
2386 void Assembler::kmovql(KRegister dst, KRegister src) {
2387   assert(VM_Version::supports_avx512bw(), "");
2388   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2389   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2390   emit_int8((unsigned char)0x90);
2391   emit_int8((unsigned char)(0xC0 | encode));
2392 }
2393 
2394 void Assembler::kmovql(KRegister dst, Address src) {
2395   assert(VM_Version::supports_avx512bw(), "");
2396   InstructionMark im(this);
2397   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2398   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2399   emit_int8((unsigned char)0x90);
2400   emit_operand((Register)dst, src);
2401 }
2402 
2403 void Assembler::kmovql(Address dst, KRegister src) {
2404   assert(VM_Version::supports_avx512bw(), "");
2405   InstructionMark im(this);
2406   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2407   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2408   emit_int8((unsigned char)0x90);
2409   emit_operand((Register)src, dst);
2410 }
2411 
2412 void Assembler::kmovql(KRegister dst, Register src) {
2413   assert(VM_Version::supports_avx512bw(), "");
2414   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2415   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2416   emit_int8((unsigned char)0x92);
2417   emit_int8((unsigned char)(0xC0 | encode));
2418 }
2419 
2420 void Assembler::kmovql(Register dst, KRegister src) {
2421   assert(VM_Version::supports_avx512bw(), "");
2422   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2423   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2424   emit_int8((unsigned char)0x93);
2425   emit_int8((unsigned char)(0xC0 | encode));
2426 }
2427 
2428 void Assembler::knotwl(KRegister dst, KRegister src) {
2429   assert(VM_Version::supports_evex(), "");
2430   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2431   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2432   emit_int8((unsigned char)0x44);
2433   emit_int8((unsigned char)(0xC0 | encode));
2434 }
2435 
2436 // This instruction produces ZF or CF flags
2437 void Assembler::kortestbl(KRegister src1, KRegister src2) {
2438   assert(VM_Version::supports_avx512dq(), "");
2439   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2440   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2441   emit_int8((unsigned char)0x98);
2442   emit_int8((unsigned char)(0xC0 | encode));
2443 }
2444 
2445 // This instruction produces ZF or CF flags
2446 void Assembler::kortestwl(KRegister src1, KRegister src2) {
2447   assert(VM_Version::supports_evex(), "");
2448   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2449   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2450   emit_int8((unsigned char)0x98);
2451   emit_int8((unsigned char)(0xC0 | encode));
2452 }
2453 
2454 // This instruction produces ZF or CF flags
2455 void Assembler::kortestdl(KRegister src1, KRegister src2) {
2456   assert(VM_Version::supports_avx512bw(), "");
2457   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2458   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2459   emit_int8((unsigned char)0x98);
2460   emit_int8((unsigned char)(0xC0 | encode));
2461 }
2462 
2463 // This instruction produces ZF or CF flags
2464 void Assembler::kortestql(KRegister src1, KRegister src2) {
2465   assert(VM_Version::supports_avx512bw(), "");
2466   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2467   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2468   emit_int8((unsigned char)0x98);
2469   emit_int8((unsigned char)(0xC0 | encode));
2470 }
2471 
2472 // This instruction produces ZF or CF flags
2473 void Assembler::ktestql(KRegister src1, KRegister src2) {
2474   assert(VM_Version::supports_avx512bw(), "");
2475   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2476   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2477   emit_int8((unsigned char)0x99);
2478   emit_int8((unsigned char)(0xC0 | encode));
2479 }
2480 
2481 void Assembler::ktestq(KRegister src1, KRegister src2) {
2482   assert(VM_Version::supports_avx512bw(), "");
2483   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2484   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2485   emit_int8((unsigned char)0x99);
2486   emit_int8((unsigned char)(0xC0 | encode));
2487 }
2488 
2489 void Assembler::ktestd(KRegister src1, KRegister src2) {
2490   assert(VM_Version::supports_avx512bw(), "");
2491   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2492   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2493   emit_int8((unsigned char)0x99);
2494   emit_int8((unsigned char)(0xC0 | encode));
2495 }
2496 
2497 void Assembler::movb(Address dst, int imm8) {
2498   InstructionMark im(this);
2499    prefix(dst);
2500   emit_int8((unsigned char)0xC6);
2501   emit_operand(rax, dst, 1);
2502   emit_int8(imm8);
2503 }
2504 
2505 
2506 void Assembler::movb(Address dst, Register src) {
2507   assert(src->has_byte_register(), "must have byte register");
2508   InstructionMark im(this);
2509   prefix(dst, src, true);
2510   emit_int8((unsigned char)0x88);
2511   emit_operand(src, dst);
2512 }
2513 
2514 void Assembler::movdl(XMMRegister dst, Register src) {
2515   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2516   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2517   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2518   emit_int8(0x6E);
2519   emit_int8((unsigned char)(0xC0 | encode));
2520 }
2521 
2522 void Assembler::movdl(Register dst, XMMRegister src) {
2523   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2524   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2525   // swap src/dst to get correct prefix
2526   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2527   emit_int8(0x7E);
2528   emit_int8((unsigned char)(0xC0 | encode));
2529 }
2530 
2531 void Assembler::movdl(XMMRegister dst, Address src) {
2532   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2533   InstructionMark im(this);
2534   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2535   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2536   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2537   emit_int8(0x6E);
2538   emit_operand(dst, src);
2539 }
2540 
2541 void Assembler::movdl(Address dst, XMMRegister src) {
2542   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2543   InstructionMark im(this);
2544   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2545   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2546   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2547   emit_int8(0x7E);
2548   emit_operand(src, dst);
2549 }
2550 
2551 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
2552   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2553   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2554   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2555   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2556   emit_int8(0x6F);
2557   emit_int8((unsigned char)(0xC0 | encode));
2558 }
2559 
2560 void Assembler::movdqa(XMMRegister dst, Address src) {
2561   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2562   InstructionMark im(this);
2563   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2564   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2565   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2566   emit_int8(0x6F);
2567   emit_operand(dst, src);
2568 }
2569 
2570 void Assembler::movdqu(XMMRegister dst, Address src) {
2571   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2572   InstructionMark im(this);
2573   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2574   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2575   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2576   emit_int8(0x6F);
2577   emit_operand(dst, src);
2578 }
2579 
2580 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
2581   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2582   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2583   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2584   emit_int8(0x6F);
2585   emit_int8((unsigned char)(0xC0 | encode));
2586 }
2587 
2588 void Assembler::movdqu(Address dst, XMMRegister src) {
2589   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2590   InstructionMark im(this);
2591   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2592   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2593   attributes.reset_is_clear_context();
2594   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2595   emit_int8(0x7F);
2596   emit_operand(src, dst);
2597 }
2598 
2599 // Move Unaligned 256bit Vector
2600 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2601   assert(UseAVX > 0, "");
2602   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2603   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2604   emit_int8(0x6F);
2605   emit_int8((unsigned char)(0xC0 | encode));
2606 }
2607 
2608 void Assembler::vmovdqu(XMMRegister dst, Address src) {
2609   assert(UseAVX > 0, "");
2610   InstructionMark im(this);
2611   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2612   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2613   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2614   emit_int8(0x6F);
2615   emit_operand(dst, src);
2616 }
2617 
2618 void Assembler::vmovdqu(Address dst, XMMRegister src) {
2619   assert(UseAVX > 0, "");
2620   InstructionMark im(this);
2621   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2622   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2623   attributes.reset_is_clear_context();
2624   // swap src<->dst for encoding
2625   assert(src != xnoreg, "sanity");
2626   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2627   emit_int8(0x7F);
2628   emit_operand(src, dst);
2629 }
2630 
2631 // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
2632 void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len) {
2633   assert(VM_Version::supports_evex(), "");
2634   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2635   attributes.set_is_evex_instruction();
2636   if (merge) {
2637     attributes.reset_is_clear_context();
2638   }
2639   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2640   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2641   emit_int8(0x6F);
2642   emit_int8((unsigned char)(0xC0 | encode));
2643 }
2644 
2645 void Assembler::evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len) {
2646   assert(VM_Version::supports_evex(), "");
2647   InstructionMark im(this);
2648   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2649   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2650   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2651   attributes.set_is_evex_instruction();
2652   if (merge) {
2653     attributes.reset_is_clear_context();
2654   }
2655   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2656   emit_int8(0x6F);
2657   emit_operand(dst, src);
2658 }
2659 
2660 void Assembler::evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len) {
2661   assert(VM_Version::supports_evex(), "");
2662   assert(src != xnoreg, "sanity");
2663   InstructionMark im(this);
2664   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2665   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2666   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2667   attributes.set_is_evex_instruction();
2668   if (merge) {
2669     attributes.reset_is_clear_context();
2670   }
2671   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2672   emit_int8(0x7F);
2673   emit_operand(src, dst);
2674 }
2675 
2676 void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
2677   assert(VM_Version::supports_avx512vlbw(), "");
2678   InstructionMark im(this);
2679   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
2680   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2681   attributes.set_embedded_opmask_register_specifier(mask);
2682   attributes.set_is_evex_instruction();
2683   if (merge) {
2684     attributes.reset_is_clear_context();
2685   }
2686   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2687   emit_int8(0x6F);
2688   emit_operand(dst, src);
2689 }
2690 
2691 void Assembler::evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len) {
2692   assert(VM_Version::supports_evex(), "");
2693   InstructionMark im(this);
2694   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2695   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2696   attributes.set_is_evex_instruction();
2697   if (merge) {
2698     attributes.reset_is_clear_context();
2699   }
2700   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2701   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2702   emit_int8(0x6F);
2703   emit_operand(dst, src);
2704 }
2705 
2706 void Assembler::evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
2707   assert(VM_Version::supports_avx512vlbw(), "");
2708   InstructionMark im(this);
2709   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
2710   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2711   attributes.set_embedded_opmask_register_specifier(mask);
2712   attributes.set_is_evex_instruction();
2713   if (merge) {
2714     attributes.reset_is_clear_context();
2715   }
2716   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2717   emit_int8(0x6F);
2718   emit_operand(dst, src);
2719 }
2720 
2721 void Assembler::evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len) {
2722   assert(VM_Version::supports_evex(), "");
2723   assert(src != xnoreg, "sanity");
2724   InstructionMark im(this);
2725   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2726   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2727   attributes.set_is_evex_instruction();
2728   if (merge) {
2729     attributes.reset_is_clear_context();
2730   }
2731   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2732   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2733   emit_int8(0x7F);
2734   emit_operand(src, dst);
2735 }
2736 
2737 void Assembler::evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
2738   assert(VM_Version::supports_avx512vlbw(), "");
2739   assert(src != xnoreg, "sanity");
2740   InstructionMark im(this);
2741   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2742   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2743   attributes.set_embedded_opmask_register_specifier(mask);
2744   attributes.set_is_evex_instruction();
2745   if (merge) {
2746     attributes.reset_is_clear_context();
2747   }
2748   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2749   emit_int8(0x7F);
2750   emit_operand(src, dst);
2751 }
2752 
2753 void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
2754   // Users of this routine assume k1 usage.
2755   evmovdqul(dst, k1, src, /*merge*/ false, vector_len);
2756 }
2757 
2758 void Assembler::evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
2759   assert(VM_Version::supports_evex(), "");
2760   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2761   attributes.set_embedded_opmask_register_specifier(mask);
2762   attributes.set_is_evex_instruction();
2763   if (merge) {
2764     attributes.reset_is_clear_context();
2765   }
2766   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2767   emit_int8(0x6F);
2768   emit_int8((unsigned char)(0xC0 | encode));
2769 }
2770 
2771 void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) {
2772   // Users of this routine assume k1 usage.
2773   evmovdqul(dst, k1, src, /*merge*/ false, vector_len);
2774 }
2775 
2776 void Assembler::evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
2777   assert(VM_Version::supports_evex(), "");
2778   InstructionMark im(this);
2779   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false , /* uses_vl */ true);
2780   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2781   attributes.set_embedded_opmask_register_specifier(mask);
2782   attributes.set_is_evex_instruction();
2783   if (merge) {
2784     attributes.reset_is_clear_context();
2785   }
2786   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2787   emit_int8(0x6F);
2788   emit_operand(dst, src);
2789 }
2790 
2791 void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
2792   // Users of this routine assume k1 usage.
2793   evmovdqul(dst, k1, src, /*merge*/ true, vector_len);
2794 }
2795 
2796 void Assembler::evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
2797   assert(VM_Version::supports_evex(), "");
2798   assert(src != xnoreg, "sanity");
2799   InstructionMark im(this);
2800   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2801   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2802   attributes.set_embedded_opmask_register_specifier(mask);
2803   attributes.set_is_evex_instruction();
2804   if (merge) {
2805     attributes.reset_is_clear_context();
2806   }
2807   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2808   emit_int8(0x7F);
2809   emit_operand(src, dst);
2810 }
2811 
2812 void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
2813   // Users of this routine assume k1 usage.
2814   evmovdquq(dst, k1, src, /*merge*/ false, vector_len);
2815 }
2816 
2817 void Assembler::evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
2818   assert(VM_Version::supports_evex(), "");
2819   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2820   attributes.set_embedded_opmask_register_specifier(mask);
2821   attributes.set_is_evex_instruction();
2822   if (merge) {
2823     attributes.reset_is_clear_context();
2824   }
2825   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2826   emit_int8(0x6F);
2827   emit_int8((unsigned char)(0xC0 | encode));
2828 }
2829 
2830 void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
2831   // Users of this routine assume k1 usage.
2832   evmovdquq(dst, k1, src, /*merge*/ false, vector_len);
2833 }
2834 
2835 void Assembler::evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
2836   assert(VM_Version::supports_evex(), "");
2837   InstructionMark im(this);
2838   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2839   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2840   attributes.set_embedded_opmask_register_specifier(mask);
2841   attributes.set_is_evex_instruction();
2842   if (merge) {
2843     attributes.reset_is_clear_context();
2844   }
2845   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2846   emit_int8(0x6F);
2847   emit_operand(dst, src);
2848 }
2849 
2850 void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
2851   // Users of this routine assume k1 usage.
2852   evmovdquq(dst, k1, src, /*merge*/ true, vector_len);
2853 }
2854 
2855 void Assembler::evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
2856   assert(VM_Version::supports_evex(), "");
2857   assert(src != xnoreg, "sanity");
2858   InstructionMark im(this);
2859   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2860   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2861   attributes.set_embedded_opmask_register_specifier(mask);
2862   if (merge) {
2863     attributes.reset_is_clear_context();
2864   }
2865   attributes.set_is_evex_instruction();
2866   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2867   emit_int8(0x7F);
2868   emit_operand(src, dst);
2869 }
2870 
2871 // Uses zero extension on 64bit
2872 
2873 void Assembler::movl(Register dst, int32_t imm32) {
2874   int encode = prefix_and_encode(dst->encoding());
2875   emit_int8((unsigned char)(0xB8 | encode));
2876   emit_int32(imm32);
2877 }
2878 
2879 void Assembler::movl(Register dst, Register src) {
2880   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2881   emit_int8((unsigned char)0x8B);
2882   emit_int8((unsigned char)(0xC0 | encode));
2883 }
2884 
2885 void Assembler::movl(Register dst, Address src) {
2886   InstructionMark im(this);
2887   prefix(src, dst);
2888   emit_int8((unsigned char)0x8B);
2889   emit_operand(dst, src);
2890 }
2891 
2892 void Assembler::movl(Address dst, int32_t imm32) {
2893   InstructionMark im(this);
2894   prefix(dst);
2895   emit_int8((unsigned char)0xC7);
2896   emit_operand(rax, dst, 4);
2897   emit_int32(imm32);
2898 }
2899 
2900 void Assembler::movl(Address dst, Register src) {
2901   InstructionMark im(this);
2902   prefix(dst, src);
2903   emit_int8((unsigned char)0x89);
2904   emit_operand(src, dst);
2905 }
2906 
2907 // New cpus require to use movsd and movss to avoid partial register stall
2908 // when loading from memory. But for old Opteron use movlpd instead of movsd.
2909 // The selection is done in MacroAssembler::movdbl() and movflt().
2910 void Assembler::movlpd(XMMRegister dst, Address src) {
2911   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2912   InstructionMark im(this);
2913   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2914   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2915   attributes.set_rex_vex_w_reverted();
2916   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2917   emit_int8(0x12);
2918   emit_operand(dst, src);
2919 }
2920 
2921 void Assembler::movq( MMXRegister dst, Address src ) {
2922   assert( VM_Version::supports_mmx(), "" );
2923   emit_int8(0x0F);
2924   emit_int8(0x6F);
2925   emit_operand(dst, src);
2926 }
2927 
2928 void Assembler::movq( Address dst, MMXRegister src ) {
2929   assert( VM_Version::supports_mmx(), "" );
2930   emit_int8(0x0F);
2931   emit_int8(0x7F);
2932   // workaround gcc (3.2.1-7a) bug
2933   // In that version of gcc with only an emit_operand(MMX, Address)
2934   // gcc will tail jump and try and reverse the parameters completely
2935   // obliterating dst in the process. By having a version available
2936   // that doesn't need to swap the args at the tail jump the bug is
2937   // avoided.
2938   emit_operand(dst, src);
2939 }
2940 
2941 void Assembler::movq(XMMRegister dst, Address src) {
2942   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2943   InstructionMark im(this);
2944   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2945   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2946   attributes.set_rex_vex_w_reverted();
2947   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2948   emit_int8(0x7E);
2949   emit_operand(dst, src);
2950 }
2951 
2952 void Assembler::movq(Address dst, XMMRegister src) {
2953   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2954   InstructionMark im(this);
2955   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2956   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2957   attributes.set_rex_vex_w_reverted();
2958   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2959   emit_int8((unsigned char)0xD6);
2960   emit_operand(src, dst);
2961 }
2962 
2963 void Assembler::movq(Register dst, XMMRegister src) {
2964   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2965   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2966   // swap src/dst to get correct prefix
2967   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2968   emit_int8(0x7E);
2969   emit_int8((unsigned char)(0xC0 | encode));
2970 }
2971 
2972 void Assembler::movq(XMMRegister dst, Register src) {
2973   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2974   InstructionAttr attributes(AVX_128bit, /* rex_w */true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2975   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2976   emit_int8(0x6E);
2977   emit_int8((unsigned char)(0xC0 | encode));
2978 }
2979 
2980 void Assembler::movsbl(Register dst, Address src) { // movsxb
2981   InstructionMark im(this);
2982   prefix(src, dst);
2983   emit_int8(0x0F);
2984   emit_int8((unsigned char)0xBE);
2985   emit_operand(dst, src);
2986 }
2987 
2988 void Assembler::movsbl(Register dst, Register src) { // movsxb
2989   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2990   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2991   emit_int8(0x0F);
2992   emit_int8((unsigned char)0xBE);
2993   emit_int8((unsigned char)(0xC0 | encode));
2994 }
2995 
2996 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
2997   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2998   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2999   attributes.set_rex_vex_w_reverted();
3000   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3001   emit_int8(0x10);
3002   emit_int8((unsigned char)(0xC0 | encode));
3003 }
3004 
3005 void Assembler::movsd(XMMRegister dst, Address src) {
3006   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3007   InstructionMark im(this);
3008   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3009   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3010   attributes.set_rex_vex_w_reverted();
3011   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3012   emit_int8(0x10);
3013   emit_operand(dst, src);
3014 }
3015 
3016 void Assembler::movsd(Address dst, XMMRegister src) {
3017   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3018   InstructionMark im(this);
3019   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3020   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3021   attributes.reset_is_clear_context();
3022   attributes.set_rex_vex_w_reverted();
3023   simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3024   emit_int8(0x11);
3025   emit_operand(src, dst);
3026 }
3027 
3028 void Assembler::movss(XMMRegister dst, XMMRegister src) {
3029   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3030   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3031   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
3032   emit_int8(0x10);
3033   emit_int8((unsigned char)(0xC0 | encode));
3034 }
3035 
3036 void Assembler::movss(XMMRegister dst, Address src) {
3037   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3038   InstructionMark im(this);
3039   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3040   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3041   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
3042   emit_int8(0x10);
3043   emit_operand(dst, src);
3044 }
3045 
3046 void Assembler::movss(Address dst, XMMRegister src) {
3047   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3048   InstructionMark im(this);
3049   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3050   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3051   attributes.reset_is_clear_context();
3052   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
3053   emit_int8(0x11);
3054   emit_operand(src, dst);
3055 }
3056 
3057 void Assembler::movswl(Register dst, Address src) { // movsxw
3058   InstructionMark im(this);
3059   prefix(src, dst);
3060   emit_int8(0x0F);
3061   emit_int8((unsigned char)0xBF);
3062   emit_operand(dst, src);
3063 }
3064 
3065 void Assembler::movswl(Register dst, Register src) { // movsxw
3066   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3067   emit_int8(0x0F);
3068   emit_int8((unsigned char)0xBF);
3069   emit_int8((unsigned char)(0xC0 | encode));
3070 }
3071 
3072 void Assembler::movw(Address dst, int imm16) {
3073   InstructionMark im(this);
3074 
3075   emit_int8(0x66); // switch to 16-bit mode
3076   prefix(dst);
3077   emit_int8((unsigned char)0xC7);
3078   emit_operand(rax, dst, 2);
3079   emit_int16(imm16);
3080 }
3081 
3082 void Assembler::movw(Register dst, Address src) {
3083   InstructionMark im(this);
3084   emit_int8(0x66);
3085   prefix(src, dst);
3086   emit_int8((unsigned char)0x8B);
3087   emit_operand(dst, src);
3088 }
3089 
3090 void Assembler::movw(Address dst, Register src) {
3091   InstructionMark im(this);
3092   emit_int8(0x66);
3093   prefix(dst, src);
3094   emit_int8((unsigned char)0x89);
3095   emit_operand(src, dst);
3096 }
3097 
3098 void Assembler::movzbl(Register dst, Address src) { // movzxb
3099   InstructionMark im(this);
3100   prefix(src, dst);
3101   emit_int8(0x0F);
3102   emit_int8((unsigned char)0xB6);
3103   emit_operand(dst, src);
3104 }
3105 
3106 void Assembler::movzbl(Register dst, Register src) { // movzxb
3107   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
3108   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
3109   emit_int8(0x0F);
3110   emit_int8((unsigned char)0xB6);
3111   emit_int8(0xC0 | encode);
3112 }
3113 
3114 void Assembler::movzwl(Register dst, Address src) { // movzxw
3115   InstructionMark im(this);
3116   prefix(src, dst);
3117   emit_int8(0x0F);
3118   emit_int8((unsigned char)0xB7);
3119   emit_operand(dst, src);
3120 }
3121 
3122 void Assembler::movzwl(Register dst, Register src) { // movzxw
3123   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3124   emit_int8(0x0F);
3125   emit_int8((unsigned char)0xB7);
3126   emit_int8(0xC0 | encode);
3127 }
3128 
3129 void Assembler::mull(Address src) {
3130   InstructionMark im(this);
3131   prefix(src);
3132   emit_int8((unsigned char)0xF7);
3133   emit_operand(rsp, src);
3134 }
3135 
3136 void Assembler::mull(Register src) {
3137   int encode = prefix_and_encode(src->encoding());
3138   emit_int8((unsigned char)0xF7);
3139   emit_int8((unsigned char)(0xE0 | encode));
3140 }
3141 
3142 void Assembler::mulsd(XMMRegister dst, Address src) {
3143   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3144   InstructionMark im(this);
3145   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3146   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3147   attributes.set_rex_vex_w_reverted();
3148   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3149   emit_int8(0x59);
3150   emit_operand(dst, src);
3151 }
3152 
3153 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
3154   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3155   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3156   attributes.set_rex_vex_w_reverted();
3157   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3158   emit_int8(0x59);
3159   emit_int8((unsigned char)(0xC0 | encode));
3160 }
3161 
3162 void Assembler::mulss(XMMRegister dst, Address src) {
3163   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3164   InstructionMark im(this);
3165   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3166   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3167   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
3168   emit_int8(0x59);
3169   emit_operand(dst, src);
3170 }
3171 
3172 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
3173   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3174   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3175   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
3176   emit_int8(0x59);
3177   emit_int8((unsigned char)(0xC0 | encode));
3178 }
3179 
3180 void Assembler::negl(Register dst) {
3181   int encode = prefix_and_encode(dst->encoding());
3182   emit_int8((unsigned char)0xF7);
3183   emit_int8((unsigned char)(0xD8 | encode));
3184 }
3185 
3186 void Assembler::nop(int i) {
3187 #ifdef ASSERT
3188   assert(i > 0, " ");
3189   // The fancy nops aren't currently recognized by debuggers making it a
3190   // pain to disassemble code while debugging. If asserts are on clearly
3191   // speed is not an issue so simply use the single byte traditional nop
3192   // to do alignment.
3193 
3194   for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
3195   return;
3196 
3197 #endif // ASSERT
3198 
3199   if (UseAddressNop && VM_Version::is_intel()) {
3200     //
3201     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
3202     //  1: 0x90
3203     //  2: 0x66 0x90
3204     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3205     //  4: 0x0F 0x1F 0x40 0x00
3206     //  5: 0x0F 0x1F 0x44 0x00 0x00
3207     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3208     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3209     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3210     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3211     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3212     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3213 
3214     // The rest coding is Intel specific - don't use consecutive address nops
3215 
3216     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3217     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3218     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3219     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3220 
3221     while(i >= 15) {
3222       // For Intel don't generate consecutive addess nops (mix with regular nops)
3223       i -= 15;
3224       emit_int8(0x66);   // size prefix
3225       emit_int8(0x66);   // size prefix
3226       emit_int8(0x66);   // size prefix
3227       addr_nop_8();
3228       emit_int8(0x66);   // size prefix
3229       emit_int8(0x66);   // size prefix
3230       emit_int8(0x66);   // size prefix
3231       emit_int8((unsigned char)0x90);
3232                          // nop
3233     }
3234     switch (i) {
3235       case 14:
3236         emit_int8(0x66); // size prefix
3237       case 13:
3238         emit_int8(0x66); // size prefix
3239       case 12:
3240         addr_nop_8();
3241         emit_int8(0x66); // size prefix
3242         emit_int8(0x66); // size prefix
3243         emit_int8(0x66); // size prefix
3244         emit_int8((unsigned char)0x90);
3245                          // nop
3246         break;
3247       case 11:
3248         emit_int8(0x66); // size prefix
3249       case 10:
3250         emit_int8(0x66); // size prefix
3251       case 9:
3252         emit_int8(0x66); // size prefix
3253       case 8:
3254         addr_nop_8();
3255         break;
3256       case 7:
3257         addr_nop_7();
3258         break;
3259       case 6:
3260         emit_int8(0x66); // size prefix
3261       case 5:
3262         addr_nop_5();
3263         break;
3264       case 4:
3265         addr_nop_4();
3266         break;
3267       case 3:
3268         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3269         emit_int8(0x66); // size prefix
3270       case 2:
3271         emit_int8(0x66); // size prefix
3272       case 1:
3273         emit_int8((unsigned char)0x90);
3274                          // nop
3275         break;
3276       default:
3277         assert(i == 0, " ");
3278     }
3279     return;
3280   }
3281   if (UseAddressNop && VM_Version::is_amd()) {
3282     //
3283     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
3284     //  1: 0x90
3285     //  2: 0x66 0x90
3286     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3287     //  4: 0x0F 0x1F 0x40 0x00
3288     //  5: 0x0F 0x1F 0x44 0x00 0x00
3289     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3290     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3291     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3292     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3293     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3294     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3295 
3296     // The rest coding is AMD specific - use consecutive address nops
3297 
3298     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3299     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3300     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3301     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3302     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3303     //     Size prefixes (0x66) are added for larger sizes
3304 
3305     while(i >= 22) {
3306       i -= 11;
3307       emit_int8(0x66); // size prefix
3308       emit_int8(0x66); // size prefix
3309       emit_int8(0x66); // size prefix
3310       addr_nop_8();
3311     }
3312     // Generate first nop for size between 21-12
3313     switch (i) {
3314       case 21:
3315         i -= 1;
3316         emit_int8(0x66); // size prefix
3317       case 20:
3318       case 19:
3319         i -= 1;
3320         emit_int8(0x66); // size prefix
3321       case 18:
3322       case 17:
3323         i -= 1;
3324         emit_int8(0x66); // size prefix
3325       case 16:
3326       case 15:
3327         i -= 8;
3328         addr_nop_8();
3329         break;
3330       case 14:
3331       case 13:
3332         i -= 7;
3333         addr_nop_7();
3334         break;
3335       case 12:
3336         i -= 6;
3337         emit_int8(0x66); // size prefix
3338         addr_nop_5();
3339         break;
3340       default:
3341         assert(i < 12, " ");
3342     }
3343 
3344     // Generate second nop for size between 11-1
3345     switch (i) {
3346       case 11:
3347         emit_int8(0x66); // size prefix
3348       case 10:
3349         emit_int8(0x66); // size prefix
3350       case 9:
3351         emit_int8(0x66); // size prefix
3352       case 8:
3353         addr_nop_8();
3354         break;
3355       case 7:
3356         addr_nop_7();
3357         break;
3358       case 6:
3359         emit_int8(0x66); // size prefix
3360       case 5:
3361         addr_nop_5();
3362         break;
3363       case 4:
3364         addr_nop_4();
3365         break;
3366       case 3:
3367         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3368         emit_int8(0x66); // size prefix
3369       case 2:
3370         emit_int8(0x66); // size prefix
3371       case 1:
3372         emit_int8((unsigned char)0x90);
3373                          // nop
3374         break;
3375       default:
3376         assert(i == 0, " ");
3377     }
3378     return;
3379   }
3380 
3381   if (UseAddressNop && VM_Version::is_zx()) {
3382     //
3383     // Using multi-bytes nops "0x0F 0x1F [address]" for ZX
3384     //  1: 0x90
3385     //  2: 0x66 0x90
3386     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3387     //  4: 0x0F 0x1F 0x40 0x00
3388     //  5: 0x0F 0x1F 0x44 0x00 0x00
3389     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3390     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3391     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3392     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3393     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3394     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3395 
3396     // The rest coding is ZX specific - don't use consecutive address nops
3397 
3398     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3399     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3400     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3401     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3402 
3403     while (i >= 15) {
3404       // For ZX don't generate consecutive addess nops (mix with regular nops)
3405       i -= 15;
3406       emit_int8(0x66);   // size prefix
3407       emit_int8(0x66);   // size prefix
3408       emit_int8(0x66);   // size prefix
3409       addr_nop_8();
3410       emit_int8(0x66);   // size prefix
3411       emit_int8(0x66);   // size prefix
3412       emit_int8(0x66);   // size prefix
3413       emit_int8((unsigned char)0x90);
3414                          // nop
3415     }
3416     switch (i) {
3417       case 14:
3418         emit_int8(0x66); // size prefix
3419       case 13:
3420         emit_int8(0x66); // size prefix
3421       case 12:
3422         addr_nop_8();
3423         emit_int8(0x66); // size prefix
3424         emit_int8(0x66); // size prefix
3425         emit_int8(0x66); // size prefix
3426         emit_int8((unsigned char)0x90);
3427                          // nop
3428         break;
3429       case 11:
3430         emit_int8(0x66); // size prefix
3431       case 10:
3432         emit_int8(0x66); // size prefix
3433       case 9:
3434         emit_int8(0x66); // size prefix
3435       case 8:
3436         addr_nop_8();
3437         break;
3438       case 7:
3439         addr_nop_7();
3440         break;
3441       case 6:
3442         emit_int8(0x66); // size prefix
3443       case 5:
3444         addr_nop_5();
3445         break;
3446       case 4:
3447         addr_nop_4();
3448         break;
3449       case 3:
3450         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3451         emit_int8(0x66); // size prefix
3452       case 2:
3453         emit_int8(0x66); // size prefix
3454       case 1:
3455         emit_int8((unsigned char)0x90);
3456                          // nop
3457         break;
3458       default:
3459         assert(i == 0, " ");
3460     }
3461     return;
3462   }
3463 
3464   // Using nops with size prefixes "0x66 0x90".
3465   // From AMD Optimization Guide:
3466   //  1: 0x90
3467   //  2: 0x66 0x90
3468   //  3: 0x66 0x66 0x90
3469   //  4: 0x66 0x66 0x66 0x90
3470   //  5: 0x66 0x66 0x90 0x66 0x90
3471   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
3472   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
3473   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
3474   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3475   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3476   //
3477   while(i > 12) {
3478     i -= 4;
3479     emit_int8(0x66); // size prefix
3480     emit_int8(0x66);
3481     emit_int8(0x66);
3482     emit_int8((unsigned char)0x90);
3483                      // nop
3484   }
3485   // 1 - 12 nops
3486   if(i > 8) {
3487     if(i > 9) {
3488       i -= 1;
3489       emit_int8(0x66);
3490     }
3491     i -= 3;
3492     emit_int8(0x66);
3493     emit_int8(0x66);
3494     emit_int8((unsigned char)0x90);
3495   }
3496   // 1 - 8 nops
3497   if(i > 4) {
3498     if(i > 6) {
3499       i -= 1;
3500       emit_int8(0x66);
3501     }
3502     i -= 3;
3503     emit_int8(0x66);
3504     emit_int8(0x66);
3505     emit_int8((unsigned char)0x90);
3506   }
3507   switch (i) {
3508     case 4:
3509       emit_int8(0x66);
3510     case 3:
3511       emit_int8(0x66);
3512     case 2:
3513       emit_int8(0x66);
3514     case 1:
3515       emit_int8((unsigned char)0x90);
3516       break;
3517     default:
3518       assert(i == 0, " ");
3519   }
3520 }
3521 
3522 void Assembler::notl(Register dst) {
3523   int encode = prefix_and_encode(dst->encoding());
3524   emit_int8((unsigned char)0xF7);
3525   emit_int8((unsigned char)(0xD0 | encode));
3526 }
3527 
3528 void Assembler::orl(Address dst, int32_t imm32) {
3529   InstructionMark im(this);
3530   prefix(dst);
3531   emit_arith_operand(0x81, rcx, dst, imm32);
3532 }
3533 
3534 void Assembler::orl(Register dst, int32_t imm32) {
3535   prefix(dst);
3536   emit_arith(0x81, 0xC8, dst, imm32);
3537 }
3538 
3539 void Assembler::orl(Register dst, Address src) {
3540   InstructionMark im(this);
3541   prefix(src, dst);
3542   emit_int8(0x0B);
3543   emit_operand(dst, src);
3544 }
3545 
3546 void Assembler::orl(Register dst, Register src) {
3547   (void) prefix_and_encode(dst->encoding(), src->encoding());
3548   emit_arith(0x0B, 0xC0, dst, src);
3549 }
3550 
3551 void Assembler::orl(Address dst, Register src) {
3552   InstructionMark im(this);
3553   prefix(dst, src);
3554   emit_int8(0x09);
3555   emit_operand(src, dst);
3556 }
3557 
3558 void Assembler::packuswb(XMMRegister dst, Address src) {
3559   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3560   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3561   InstructionMark im(this);
3562   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3563   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3564   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3565   emit_int8(0x67);
3566   emit_operand(dst, src);
3567 }
3568 
3569 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
3570   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3571   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3572   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3573   emit_int8(0x67);
3574   emit_int8((unsigned char)(0xC0 | encode));
3575 }
3576 
3577 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3578   assert(UseAVX > 0, "some form of AVX must be enabled");
3579   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3580   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3581   emit_int8(0x67);
3582   emit_int8((unsigned char)(0xC0 | encode));
3583 }
3584 
3585 void Assembler::vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3586   assert(UseAVX > 0, "some form of AVX must be enabled");
3587   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3588   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3589   emit_int8(0x2B);
3590   emit_int8((unsigned char)(0xC0 | encode));
3591 }
3592 
3593 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
3594   assert(VM_Version::supports_avx2(), "");
3595   // VEX.256.66.0F3A.W1 00 /r ib
3596   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3597   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3598   emit_int8(0x00);
3599   emit_int8(0xC0 | encode);
3600   emit_int8(imm8);
3601 }
3602 
3603 void Assembler::vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3604   assert(VM_Version::supports_avx2(), "");
3605   // VEX.NDS.256.66.0F38.W0 36 /r
3606   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3607   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3608   emit_int8(0x36);
3609   emit_int8(0xC0 | encode);
3610 }
3611 
3612 void Assembler::vpermd(XMMRegister dst, XMMRegister nds, Address src) {
3613   assert(VM_Version::supports_avx2(), "");
3614   // VEX.NDS.256.66.0F38.W0 36 /r
3615   InstructionMark im(this);
3616   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3617   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3618   emit_int8(0x36);
3619   emit_operand(dst, src);
3620 }
3621 
3622 void Assembler::vperm2i128(XMMRegister dst,  XMMRegister nds, XMMRegister src, int imm8) {
3623   assert(VM_Version::supports_avx2(), "");
3624   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3625   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3626   emit_int8(0x46);
3627   emit_int8(0xC0 | encode);
3628   emit_int8(imm8);
3629 }
3630 
3631 void Assembler::vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
3632   assert(VM_Version::supports_avx(), "");
3633   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3634   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3635   emit_int8(0x06);
3636   emit_int8(0xC0 | encode);
3637   emit_int8(imm8);
3638 }
3639 
3640 
3641 void Assembler::pause() {
3642   emit_int8((unsigned char)0xF3);
3643   emit_int8((unsigned char)0x90);
3644 }
3645 
3646 void Assembler::ud2() {
3647   emit_int8(0x0F);
3648   emit_int8(0x0B);
3649 }
3650 
3651 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
3652   assert(VM_Version::supports_sse4_2(), "");
3653   InstructionMark im(this);
3654   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3655   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3656   emit_int8(0x61);
3657   emit_operand(dst, src);
3658   emit_int8(imm8);
3659 }
3660 
3661 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
3662   assert(VM_Version::supports_sse4_2(), "");
3663   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3664   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3665   emit_int8(0x61);
3666   emit_int8((unsigned char)(0xC0 | encode));
3667   emit_int8(imm8);
3668 }
3669 
3670 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3671 void Assembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
3672   assert(VM_Version::supports_sse2(), "");
3673   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3674   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3675   emit_int8(0x74);
3676   emit_int8((unsigned char)(0xC0 | encode));
3677 }
3678 
3679 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3680 void Assembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3681   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
3682   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
3683   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3684   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3685   emit_int8(0x74);
3686   emit_int8((unsigned char)(0xC0 | encode));
3687 }
3688 
3689 // In this context, kdst is written the mask used to process the equal components
3690 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3691   assert(VM_Version::supports_avx512bw(), "");
3692   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3693   attributes.set_is_evex_instruction();
3694   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3695   emit_int8(0x74);
3696   emit_int8((unsigned char)(0xC0 | encode));
3697 }
3698 
3699 void Assembler::evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3700   assert(VM_Version::supports_avx512vlbw(), "");
3701   InstructionMark im(this);
3702   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3703   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3704   attributes.set_is_evex_instruction();
3705   int dst_enc = kdst->encoding();
3706   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3707   emit_int8(0x64);
3708   emit_operand(as_Register(dst_enc), src);
3709 }
3710 
3711 void Assembler::evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3712   assert(is_vector_masking(), "");
3713   assert(VM_Version::supports_avx512vlbw(), "");
3714   InstructionMark im(this);
3715   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3716   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3717   attributes.reset_is_clear_context();
3718   attributes.set_embedded_opmask_register_specifier(mask);
3719   attributes.set_is_evex_instruction();
3720   int dst_enc = kdst->encoding();
3721   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3722   emit_int8(0x64);
3723   emit_operand(as_Register(dst_enc), src);
3724 }
3725 
3726 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3727   assert(VM_Version::supports_avx512vlbw(), "");
3728   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3729   attributes.set_is_evex_instruction();
3730   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3731   emit_int8(0x3E);
3732   emit_int8((unsigned char)(0xC0 | encode));
3733   emit_int8(vcc);
3734 }
3735 
3736 void Assembler::evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3737   assert(is_vector_masking(), "");
3738   assert(VM_Version::supports_avx512vlbw(), "");
3739   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3740   attributes.reset_is_clear_context();
3741   attributes.set_embedded_opmask_register_specifier(mask);
3742   attributes.set_is_evex_instruction();
3743   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3744   emit_int8(0x3E);
3745   emit_int8((unsigned char)(0xC0 | encode));
3746   emit_int8(vcc);
3747 }
3748 
3749 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len) {
3750   assert(VM_Version::supports_avx512vlbw(), "");
3751   InstructionMark im(this);
3752   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3753   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3754   attributes.set_is_evex_instruction();
3755   int dst_enc = kdst->encoding();
3756   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3757   emit_int8(0x3E);
3758   emit_operand(as_Register(dst_enc), src);
3759   emit_int8(vcc);
3760 }
3761 
3762 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3763   assert(VM_Version::supports_avx512bw(), "");
3764   InstructionMark im(this);
3765   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3766   attributes.set_is_evex_instruction();
3767   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3768   int dst_enc = kdst->encoding();
3769   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3770   emit_int8(0x74);
3771   emit_operand(as_Register(dst_enc), src);
3772 }
3773 
3774 void Assembler::evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3775   assert(VM_Version::supports_avx512vlbw(), "");
3776   assert(is_vector_masking(), "");    // For stub code use only
3777   InstructionMark im(this);
3778   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_reg_mask */ false, /* uses_vl */ false);
3779   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3780   attributes.reset_is_clear_context();
3781   attributes.set_embedded_opmask_register_specifier(mask);
3782   attributes.set_is_evex_instruction();
3783   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3784   emit_int8(0x74);
3785   emit_operand(as_Register(kdst->encoding()), src);
3786 }
3787 
3788 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3789 void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
3790   assert(VM_Version::supports_sse2(), "");
3791   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3792   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3793   emit_int8(0x75);
3794   emit_int8((unsigned char)(0xC0 | encode));
3795 }
3796 
3797 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3798 void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3799   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
3800   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
3801   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3802   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3803   emit_int8(0x75);
3804   emit_int8((unsigned char)(0xC0 | encode));
3805 }
3806 
3807 // In this context, kdst is written the mask used to process the equal components
3808 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3809   assert(VM_Version::supports_avx512bw(), "");
3810   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3811   attributes.set_is_evex_instruction();
3812   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3813   emit_int8(0x75);
3814   emit_int8((unsigned char)(0xC0 | encode));
3815 }
3816 
3817 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3818   assert(VM_Version::supports_avx512bw(), "");
3819   InstructionMark im(this);
3820   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3821   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3822   attributes.set_is_evex_instruction();
3823   int dst_enc = kdst->encoding();
3824   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3825   emit_int8(0x75);
3826   emit_operand(as_Register(dst_enc), src);
3827 }
3828 
3829 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3830 void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
3831   assert(VM_Version::supports_sse2(), "");
3832   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3833   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3834   emit_int8(0x76);
3835   emit_int8((unsigned char)(0xC0 | encode));
3836 }
3837 
3838 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3839 void Assembler::vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3840   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
3841   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
3842   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3843   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3844   emit_int8((unsigned char)0x76);
3845   emit_int8((unsigned char)(0xC0 | encode));
3846 }
3847 
3848 // In this context, kdst is written the mask used to process the equal components
3849 void Assembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len) {
3850   assert(VM_Version::supports_evex(), "");
3851   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3852   attributes.set_is_evex_instruction();
3853   attributes.reset_is_clear_context();
3854   attributes.set_embedded_opmask_register_specifier(mask);
3855   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3856   emit_int8(0x76);
3857   emit_int8((unsigned char)(0xC0 | encode));
3858 }
3859 
3860 void Assembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3861   assert(VM_Version::supports_evex(), "");
3862   InstructionMark im(this);
3863   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3864   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3865   attributes.set_is_evex_instruction();
3866   attributes.reset_is_clear_context();
3867   attributes.set_embedded_opmask_register_specifier(mask);
3868   int dst_enc = kdst->encoding();
3869   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3870   emit_int8(0x76);
3871   emit_operand(as_Register(dst_enc), src);
3872 }
3873 
3874 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3875 void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) {
3876   assert(VM_Version::supports_sse4_1(), "");
3877   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3878   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3879   emit_int8(0x29);
3880   emit_int8((unsigned char)(0xC0 | encode));
3881 }
3882 
3883 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3884 void Assembler::vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3885   assert(VM_Version::supports_avx(), "");
3886   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3887   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3888   emit_int8(0x29);
3889   emit_int8((unsigned char)(0xC0 | encode));
3890 }
3891 
3892 // In this context, kdst is written the mask used to process the equal components
3893 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3894   assert(VM_Version::supports_evex(), "");
3895   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3896   attributes.reset_is_clear_context();
3897   attributes.set_is_evex_instruction();
3898   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3899   emit_int8(0x29);
3900   emit_int8((unsigned char)(0xC0 | encode));
3901 }
3902 
3903 // In this context, kdst is written the mask used to process the equal components
3904 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3905   assert(VM_Version::supports_evex(), "");
3906   InstructionMark im(this);
3907   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3908   attributes.reset_is_clear_context();
3909   attributes.set_is_evex_instruction();
3910   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
3911   int dst_enc = kdst->encoding();
3912   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3913   emit_int8(0x29);
3914   emit_operand(as_Register(dst_enc), src);
3915 }
3916 
3917 void Assembler::pcmpgtq(XMMRegister dst, XMMRegister src) {
3918   assert(VM_Version::supports_sse4_1(), "");
3919   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3920   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3921   emit_int8(0x37);
3922   emit_int8((unsigned char)(0xC0 | encode));
3923 }
3924 
3925 void Assembler::pmovmskb(Register dst, XMMRegister src) {
3926   assert(VM_Version::supports_sse2(), "");
3927   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3928   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3929   emit_int8((unsigned char)0xD7);
3930   emit_int8((unsigned char)(0xC0 | encode));
3931 }
3932 
3933 void Assembler::vpmovmskb(Register dst, XMMRegister src) {
3934   assert(VM_Version::supports_avx2(), "");
3935   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3936   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3937   emit_int8((unsigned char)0xD7);
3938   emit_int8((unsigned char)(0xC0 | encode));
3939 }
3940 
3941 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
3942   assert(VM_Version::supports_sse4_1(), "");
3943   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3944   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3945   emit_int8(0x16);
3946   emit_int8((unsigned char)(0xC0 | encode));
3947   emit_int8(imm8);
3948 }
3949 
3950 void Assembler::pextrd(Address dst, XMMRegister src, int imm8) {
3951   assert(VM_Version::supports_sse4_1(), "");
3952   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3953   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3954   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3955   emit_int8(0x16);
3956   emit_operand(src, dst);
3957   emit_int8(imm8);
3958 }
3959 
3960 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
3961   assert(VM_Version::supports_sse4_1(), "");
3962   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3963   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3964   emit_int8(0x16);
3965   emit_int8((unsigned char)(0xC0 | encode));
3966   emit_int8(imm8);
3967 }
3968 
3969 void Assembler::pextrq(Address dst, XMMRegister src, int imm8) {
3970   assert(VM_Version::supports_sse4_1(), "");
3971   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3972   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3973   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3974   emit_int8(0x16);
3975   emit_operand(src, dst);
3976   emit_int8(imm8);
3977 }
3978 
3979 void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
3980   assert(VM_Version::supports_sse2(), "");
3981   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3982   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3983   emit_int8((unsigned char)0xC5);
3984   emit_int8((unsigned char)(0xC0 | encode));
3985   emit_int8(imm8);
3986 }
3987 
3988 void Assembler::pextrw(Address dst, XMMRegister src, int imm8) {
3989   assert(VM_Version::supports_sse4_1(), "");
3990   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3991   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
3992   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3993   emit_int8((unsigned char)0x15);
3994   emit_operand(src, dst);
3995   emit_int8(imm8);
3996 }
3997 
3998 void Assembler::pextrb(Register dst, XMMRegister src, int imm8) {
3999   assert(VM_Version::supports_sse4_1(), "");
4000   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4001   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4002   emit_int8(0x14);
4003   emit_int8((unsigned char)(0xC0 | encode));
4004   emit_int8(imm8);
4005 }
4006 
4007 void Assembler::pextrb(Address dst, XMMRegister src, int imm8) {
4008   assert(VM_Version::supports_sse4_1(), "");
4009   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4010   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
4011   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4012   emit_int8(0x14);
4013   emit_operand(src, dst);
4014   emit_int8(imm8);
4015 }
4016 
4017 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
4018   assert(VM_Version::supports_sse4_1(), "");
4019   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4020   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4021   emit_int8(0x22);
4022   emit_int8((unsigned char)(0xC0 | encode));
4023   emit_int8(imm8);
4024 }
4025 
4026 void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) {
4027   assert(VM_Version::supports_sse4_1(), "");
4028   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4029   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4030   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4031   emit_int8(0x22);
4032   emit_operand(dst,src);
4033   emit_int8(imm8);
4034 }
4035 
4036 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
4037   assert(VM_Version::supports_sse4_1(), "");
4038   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4039   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4040   emit_int8(0x22);
4041   emit_int8((unsigned char)(0xC0 | encode));
4042   emit_int8(imm8);
4043 }
4044 
4045 void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) {
4046   assert(VM_Version::supports_sse4_1(), "");
4047   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4048   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4049   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4050   emit_int8(0x22);
4051   emit_operand(dst, src);
4052   emit_int8(imm8);
4053 }
4054 
4055 void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
4056   assert(VM_Version::supports_sse2(), "");
4057   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4058   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4059   emit_int8((unsigned char)0xC4);
4060   emit_int8((unsigned char)(0xC0 | encode));
4061   emit_int8(imm8);
4062 }
4063 
4064 void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) {
4065   assert(VM_Version::supports_sse2(), "");
4066   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4067   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
4068   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4069   emit_int8((unsigned char)0xC4);
4070   emit_operand(dst, src);
4071   emit_int8(imm8);
4072 }
4073 
4074 void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) {
4075   assert(VM_Version::supports_sse4_1(), "");
4076   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4077   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
4078   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4079   emit_int8(0x20);
4080   emit_operand(dst, src);
4081   emit_int8(imm8);
4082 }
4083 
4084 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
4085   assert(VM_Version::supports_sse4_1(), "");
4086   InstructionMark im(this);
4087   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4088   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
4089   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4090   emit_int8(0x30);
4091   emit_operand(dst, src);
4092 }
4093 
4094 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
4095   assert(VM_Version::supports_sse4_1(), "");
4096   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4097   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4098   emit_int8(0x30);
4099   emit_int8((unsigned char)(0xC0 | encode));
4100 }
4101 
4102 void Assembler::pmovzxdq(XMMRegister dst, XMMRegister src) {
4103   assert(VM_Version::supports_sse4_1(), "");
4104   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4105   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4106   emit_int8(0x35);
4107   emit_int8((unsigned char)(0xC0 | encode));
4108 }
4109 
4110 void Assembler::pmovsxbw(XMMRegister dst, XMMRegister src) {
4111   assert(VM_Version::supports_sse4_1(), "");
4112   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4113   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4114   emit_int8(0x20);
4115   emit_int8((unsigned char)(0xC0 | encode));
4116 }
4117 
4118 void Assembler::pmovsxbd(XMMRegister dst, XMMRegister src) {
4119   assert(VM_Version::supports_sse4_1(), "");
4120   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4121   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4122   emit_int8(0x21);
4123   emit_int8((unsigned char)(0xC0 | encode));
4124 }
4125 
4126 void Assembler::pmovsxbq(XMMRegister dst, XMMRegister src) {
4127   assert(VM_Version::supports_sse4_1(), "");
4128   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4129   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4130   emit_int8(0x22);
4131   emit_int8((unsigned char)(0xC0 | encode));
4132 }
4133 
4134 void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
4135   assert(VM_Version::supports_avx(), "");
4136   InstructionMark im(this);
4137   assert(dst != xnoreg, "sanity");
4138   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4139   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
4140   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4141   emit_int8(0x30);
4142   emit_operand(dst, src);
4143 }
4144 
4145 void Assembler::evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
4146   assert(is_vector_masking(), "");
4147   assert(VM_Version::supports_avx512vlbw(), "");
4148   assert(dst != xnoreg, "sanity");
4149   InstructionMark im(this);
4150   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4151   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
4152   attributes.set_embedded_opmask_register_specifier(mask);
4153   attributes.set_is_evex_instruction();
4154   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4155   emit_int8(0x30);
4156   emit_operand(dst, src);
4157 }
4158 
4159 void Assembler::vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len) {
4160   assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), "");
4161   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4162   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4163   emit_int8(0x35);
4164   emit_int8((unsigned char)(0xC0 | encode));
4165 }
4166 
4167 void Assembler::vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len) {
4168   assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), "");
4169   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4170   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4171   emit_int8(0x31);
4172   emit_int8((unsigned char)(0xC0 | encode));
4173 }
4174 
4175 void Assembler::vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len) {
4176   assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), "");
4177   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4178   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4179   emit_int8(0x32);
4180   emit_int8((unsigned char)(0xC0 | encode));
4181 }
4182 
4183 void Assembler::vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len) {
4184   assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), "");
4185   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4186   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4187   emit_int8(0x21);
4188   emit_int8((unsigned char)(0xC0 | encode));
4189 }
4190 
4191 void Assembler::vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len) {
4192   assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), "");
4193   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4194   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4195   emit_int8(0x22);
4196   emit_int8((unsigned char)(0xC0 | encode));
4197 }
4198 
4199 void Assembler::vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len) {
4200   assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), "");
4201   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4202   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4203   emit_int8(0x20);
4204   emit_int8((unsigned char)(0xC0 | encode));
4205 }
4206 
4207 void Assembler::evpmovwb(Address dst, XMMRegister src, int vector_len) {
4208   assert(VM_Version::supports_avx512vlbw(), "");
4209   assert(src != xnoreg, "sanity");
4210   InstructionMark im(this);
4211   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4212   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
4213   attributes.set_is_evex_instruction();
4214   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
4215   emit_int8(0x30);
4216   emit_operand(src, dst);
4217 }
4218 
4219 void Assembler::evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len) {
4220   assert(is_vector_masking(), "");
4221   assert(VM_Version::supports_avx512vlbw(), "");
4222   assert(src != xnoreg, "sanity");
4223   InstructionMark im(this);
4224   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4225   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
4226   attributes.reset_is_clear_context();
4227   attributes.set_embedded_opmask_register_specifier(mask);
4228   attributes.set_is_evex_instruction();
4229   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
4230   emit_int8(0x30);
4231   emit_operand(src, dst);
4232 }
4233 
4234 // generic
4235 void Assembler::pop(Register dst) {
4236   int encode = prefix_and_encode(dst->encoding());
4237   emit_int8(0x58 | encode);
4238 }
4239 
4240 void Assembler::popcntl(Register dst, Address src) {
4241   assert(VM_Version::supports_popcnt(), "must support");
4242   InstructionMark im(this);
4243   emit_int8((unsigned char)0xF3);
4244   prefix(src, dst);
4245   emit_int8(0x0F);
4246   emit_int8((unsigned char)0xB8);
4247   emit_operand(dst, src);
4248 }
4249 
4250 void Assembler::popcntl(Register dst, Register src) {
4251   assert(VM_Version::supports_popcnt(), "must support");
4252   emit_int8((unsigned char)0xF3);
4253   int encode = prefix_and_encode(dst->encoding(), src->encoding());
4254   emit_int8(0x0F);
4255   emit_int8((unsigned char)0xB8);
4256   emit_int8((unsigned char)(0xC0 | encode));
4257 }
4258 
4259 void Assembler::vpopcntd(XMMRegister dst, XMMRegister src, int vector_len) {
4260   assert(VM_Version::supports_vpopcntdq(), "must support vpopcntdq feature");
4261   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4262   attributes.set_is_evex_instruction();
4263   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4264   emit_int8(0x55);
4265   emit_int8((unsigned char)(0xC0 | encode));
4266 }
4267 
4268 void Assembler::popf() {
4269   emit_int8((unsigned char)0x9D);
4270 }
4271 
4272 #ifndef _LP64 // no 32bit push/pop on amd64
4273 void Assembler::popl(Address dst) {
4274   // NOTE: this will adjust stack by 8byte on 64bits
4275   InstructionMark im(this);
4276   prefix(dst);
4277   emit_int8((unsigned char)0x8F);
4278   emit_operand(rax, dst);
4279 }
4280 #endif
4281 
4282 void Assembler::prefetch_prefix(Address src) {
4283   prefix(src);
4284   emit_int8(0x0F);
4285 }
4286 
4287 void Assembler::prefetchnta(Address src) {
4288   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
4289   InstructionMark im(this);
4290   prefetch_prefix(src);
4291   emit_int8(0x18);
4292   emit_operand(rax, src); // 0, src
4293 }
4294 
4295 void Assembler::prefetchr(Address src) {
4296   assert(VM_Version::supports_3dnow_prefetch(), "must support");
4297   InstructionMark im(this);
4298   prefetch_prefix(src);
4299   emit_int8(0x0D);
4300   emit_operand(rax, src); // 0, src
4301 }
4302 
4303 void Assembler::prefetcht0(Address src) {
4304   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
4305   InstructionMark im(this);
4306   prefetch_prefix(src);
4307   emit_int8(0x18);
4308   emit_operand(rcx, src); // 1, src
4309 }
4310 
4311 void Assembler::prefetcht1(Address src) {
4312   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
4313   InstructionMark im(this);
4314   prefetch_prefix(src);
4315   emit_int8(0x18);
4316   emit_operand(rdx, src); // 2, src
4317 }
4318 
4319 void Assembler::prefetcht2(Address src) {
4320   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
4321   InstructionMark im(this);
4322   prefetch_prefix(src);
4323   emit_int8(0x18);
4324   emit_operand(rbx, src); // 3, src
4325 }
4326 
4327 void Assembler::prefetchw(Address src) {
4328   assert(VM_Version::supports_3dnow_prefetch(), "must support");
4329   InstructionMark im(this);
4330   prefetch_prefix(src);
4331   emit_int8(0x0D);
4332   emit_operand(rcx, src); // 1, src
4333 }
4334 
4335 void Assembler::prefix(Prefix p) {
4336   emit_int8(p);
4337 }
4338 
4339 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
4340   assert(VM_Version::supports_ssse3(), "");
4341   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4342   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4343   emit_int8(0x00);
4344   emit_int8((unsigned char)(0xC0 | encode));
4345 }
4346 
4347 void Assembler::vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4348   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4349          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4350          0, "");
4351   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4352   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4353   emit_int8(0x00);
4354   emit_int8((unsigned char)(0xC0 | encode));
4355 }
4356 
4357 void Assembler::pshufb(XMMRegister dst, Address src) {
4358   assert(VM_Version::supports_ssse3(), "");
4359   InstructionMark im(this);
4360   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4361   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4362   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4363   emit_int8(0x00);
4364   emit_operand(dst, src);
4365 }
4366 
4367 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
4368   assert(isByte(mode), "invalid value");
4369   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4370   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
4371   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4372   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4373   emit_int8(0x70);
4374   emit_int8((unsigned char)(0xC0 | encode));
4375   emit_int8(mode & 0xFF);
4376 }
4377 
4378 void Assembler::vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len) {
4379   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4380          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4381          0, "");
4382   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4383   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4384   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4385   emit_int8(0x70);
4386   emit_int8((unsigned char)(0xC0 | encode));
4387   emit_int8(mode & 0xFF);
4388 }
4389 
4390 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
4391   assert(isByte(mode), "invalid value");
4392   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4393   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4394   InstructionMark im(this);
4395   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4396   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4397   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4398   emit_int8(0x70);
4399   emit_operand(dst, src);
4400   emit_int8(mode & 0xFF);
4401 }
4402 
4403 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
4404   assert(isByte(mode), "invalid value");
4405   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4406   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4407   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4408   emit_int8(0x70);
4409   emit_int8((unsigned char)(0xC0 | encode));
4410   emit_int8(mode & 0xFF);
4411 }
4412 
4413 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
4414   assert(isByte(mode), "invalid value");
4415   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4416   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4417   InstructionMark im(this);
4418   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4419   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4420   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4421   emit_int8(0x70);
4422   emit_operand(dst, src);
4423   emit_int8(mode & 0xFF);
4424 }
4425 
4426 void Assembler::psrldq(XMMRegister dst, int shift) {
4427   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
4428   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4429   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4430   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4431   emit_int8(0x73);
4432   emit_int8((unsigned char)(0xC0 | encode));
4433   emit_int8(shift);
4434 }
4435 
4436 void Assembler::pslldq(XMMRegister dst, int shift) {
4437   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
4438   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4439   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4440   // XMM7 is for /7 encoding: 66 0F 73 /7 ib
4441   int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4442   emit_int8(0x73);
4443   emit_int8((unsigned char)(0xC0 | encode));
4444   emit_int8(shift);
4445 }
4446 
4447 void Assembler::ptest(XMMRegister dst, Address src) {
4448   assert(VM_Version::supports_sse4_1(), "");
4449   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4450   InstructionMark im(this);
4451   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4452   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4453   emit_int8(0x17);
4454   emit_operand(dst, src);
4455 }
4456 
4457 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
4458   assert(VM_Version::supports_sse4_1(), "");
4459   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4460   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4461   emit_int8(0x17);
4462   emit_int8((unsigned char)(0xC0 | encode));
4463 }
4464 
4465 void Assembler::vptest(XMMRegister dst, Address src) {
4466   assert(VM_Version::supports_avx(), "");
4467   InstructionMark im(this);
4468   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4469   assert(dst != xnoreg, "sanity");
4470   // swap src<->dst for encoding
4471   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4472   emit_int8(0x17);
4473   emit_operand(dst, src);
4474 }
4475 
4476 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
4477   assert(VM_Version::supports_avx(), "");
4478   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4479   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4480   emit_int8(0x17);
4481   emit_int8((unsigned char)(0xC0 | encode));
4482 }
4483 
4484 void Assembler::vptest(XMMRegister dst, XMMRegister src, int vector_len) {
4485   assert(VM_Version::supports_avx(), "");
4486   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4487   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4488   emit_int8(0x17);
4489   emit_int8((unsigned char)(0xC0 | encode));
4490 }
4491 
4492 void Assembler::punpcklbw(XMMRegister dst, Address src) {
4493   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4494   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4495   InstructionMark im(this);
4496   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
4497   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4498   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4499   emit_int8(0x60);
4500   emit_operand(dst, src);
4501 }
4502 
4503 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
4504   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4505   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
4506   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4507   emit_int8(0x60);
4508   emit_int8((unsigned char)(0xC0 | encode));
4509 }
4510 
4511 void Assembler::punpckldq(XMMRegister dst, Address src) {
4512   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4513   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4514   InstructionMark im(this);
4515   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4516   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4517   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4518   emit_int8(0x62);
4519   emit_operand(dst, src);
4520 }
4521 
4522 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
4523   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4524   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4525   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4526   emit_int8(0x62);
4527   emit_int8((unsigned char)(0xC0 | encode));
4528 }
4529 
4530 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
4531   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4532   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4533   attributes.set_rex_vex_w_reverted();
4534   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4535   emit_int8(0x6C);
4536   emit_int8((unsigned char)(0xC0 | encode));
4537 }
4538 
4539 void Assembler::push(int32_t imm32) {
4540   // in 64bits we push 64bits onto the stack but only
4541   // take a 32bit immediate
4542   emit_int8(0x68);
4543   emit_int32(imm32);
4544 }
4545 
4546 void Assembler::push(Register src) {
4547   int encode = prefix_and_encode(src->encoding());
4548 
4549   emit_int8(0x50 | encode);
4550 }
4551 
4552 void Assembler::pushf() {
4553   emit_int8((unsigned char)0x9C);
4554 }
4555 
4556 #ifndef _LP64 // no 32bit push/pop on amd64
4557 void Assembler::pushl(Address src) {
4558   // Note this will push 64bit on 64bit
4559   InstructionMark im(this);
4560   prefix(src);
4561   emit_int8((unsigned char)0xFF);
4562   emit_operand(rsi, src);
4563 }
4564 #endif
4565 
4566 void Assembler::rcll(Register dst, int imm8) {
4567   assert(isShiftCount(imm8), "illegal shift count");
4568   int encode = prefix_and_encode(dst->encoding());
4569   if (imm8 == 1) {
4570     emit_int8((unsigned char)0xD1);
4571     emit_int8((unsigned char)(0xD0 | encode));
4572   } else {
4573     emit_int8((unsigned char)0xC1);
4574     emit_int8((unsigned char)0xD0 | encode);
4575     emit_int8(imm8);
4576   }
4577 }
4578 
4579 void Assembler::rcpps(XMMRegister dst, XMMRegister src) {
4580   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4581   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4582   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4583   emit_int8(0x53);
4584   emit_int8((unsigned char)(0xC0 | encode));
4585 }
4586 
4587 void Assembler::rcpss(XMMRegister dst, XMMRegister src) {
4588   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4589   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4590   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4591   emit_int8(0x53);
4592   emit_int8((unsigned char)(0xC0 | encode));
4593 }
4594 
4595 void Assembler::rdtsc() {
4596   emit_int8((unsigned char)0x0F);
4597   emit_int8((unsigned char)0x31);
4598 }
4599 
4600 // copies data from [esi] to [edi] using rcx pointer sized words
4601 // generic
4602 void Assembler::rep_mov() {
4603   emit_int8((unsigned char)0xF3);
4604   // MOVSQ
4605   LP64_ONLY(prefix(REX_W));
4606   emit_int8((unsigned char)0xA5);
4607 }
4608 
4609 // sets rcx bytes with rax, value at [edi]
4610 void Assembler::rep_stosb() {
4611   emit_int8((unsigned char)0xF3); // REP
4612   LP64_ONLY(prefix(REX_W));
4613   emit_int8((unsigned char)0xAA); // STOSB
4614 }
4615 
4616 // sets rcx pointer sized words with rax, value at [edi]
4617 // generic
4618 void Assembler::rep_stos() {
4619   emit_int8((unsigned char)0xF3); // REP
4620   LP64_ONLY(prefix(REX_W));       // LP64:STOSQ, LP32:STOSD
4621   emit_int8((unsigned char)0xAB);
4622 }
4623 
4624 // scans rcx pointer sized words at [edi] for occurance of rax,
4625 // generic
4626 void Assembler::repne_scan() { // repne_scan
4627   emit_int8((unsigned char)0xF2);
4628   // SCASQ
4629   LP64_ONLY(prefix(REX_W));
4630   emit_int8((unsigned char)0xAF);
4631 }
4632 
4633 #ifdef _LP64
4634 // scans rcx 4 byte words at [edi] for occurance of rax,
4635 // generic
4636 void Assembler::repne_scanl() { // repne_scan
4637   emit_int8((unsigned char)0xF2);
4638   // SCASL
4639   emit_int8((unsigned char)0xAF);
4640 }
4641 #endif
4642 
4643 void Assembler::ret(int imm16) {
4644   if (imm16 == 0) {
4645     emit_int8((unsigned char)0xC3);
4646   } else {
4647     emit_int8((unsigned char)0xC2);
4648     emit_int16(imm16);
4649   }
4650 }
4651 
4652 void Assembler::sahf() {
4653 #ifdef _LP64
4654   // Not supported in 64bit mode
4655   ShouldNotReachHere();
4656 #endif
4657   emit_int8((unsigned char)0x9E);
4658 }
4659 
4660 void Assembler::sarl(Register dst, int imm8) {
4661   int encode = prefix_and_encode(dst->encoding());
4662   assert(isShiftCount(imm8), "illegal shift count");
4663   if (imm8 == 1) {
4664     emit_int8((unsigned char)0xD1);
4665     emit_int8((unsigned char)(0xF8 | encode));
4666   } else {
4667     emit_int8((unsigned char)0xC1);
4668     emit_int8((unsigned char)(0xF8 | encode));
4669     emit_int8(imm8);
4670   }
4671 }
4672 
4673 void Assembler::sarl(Register dst) {
4674   int encode = prefix_and_encode(dst->encoding());
4675   emit_int8((unsigned char)0xD3);
4676   emit_int8((unsigned char)(0xF8 | encode));
4677 }
4678 
4679 void Assembler::sbbl(Address dst, int32_t imm32) {
4680   InstructionMark im(this);
4681   prefix(dst);
4682   emit_arith_operand(0x81, rbx, dst, imm32);
4683 }
4684 
4685 void Assembler::sbbl(Register dst, int32_t imm32) {
4686   prefix(dst);
4687   emit_arith(0x81, 0xD8, dst, imm32);
4688 }
4689 
4690 
4691 void Assembler::sbbl(Register dst, Address src) {
4692   InstructionMark im(this);
4693   prefix(src, dst);
4694   emit_int8(0x1B);
4695   emit_operand(dst, src);
4696 }
4697 
4698 void Assembler::sbbl(Register dst, Register src) {
4699   (void) prefix_and_encode(dst->encoding(), src->encoding());
4700   emit_arith(0x1B, 0xC0, dst, src);
4701 }
4702 
4703 void Assembler::setb(Condition cc, Register dst) {
4704   assert(0 <= cc && cc < 16, "illegal cc");
4705   int encode = prefix_and_encode(dst->encoding(), true);
4706   emit_int8(0x0F);
4707   emit_int8((unsigned char)0x90 | cc);
4708   emit_int8((unsigned char)(0xC0 | encode));
4709 }
4710 
4711 void Assembler::palignr(XMMRegister dst, XMMRegister src, int imm8) {
4712   assert(VM_Version::supports_ssse3(), "");
4713   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
4714   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4715   emit_int8((unsigned char)0x0F);
4716   emit_int8((unsigned char)(0xC0 | encode));
4717   emit_int8(imm8);
4718 }
4719 
4720 void Assembler::vpalignr(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
4721   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4722          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4723          0, "");
4724   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
4725   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4726   emit_int8((unsigned char)0x0F);
4727   emit_int8((unsigned char)(0xC0 | encode));
4728   emit_int8(imm8);
4729 }
4730 
4731 void Assembler::pblendw(XMMRegister dst, XMMRegister src, int imm8) {
4732   assert(VM_Version::supports_sse4_1(), "");
4733   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4734   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4735   emit_int8((unsigned char)0x0E);
4736   emit_int8((unsigned char)(0xC0 | encode));
4737   emit_int8(imm8);
4738 }
4739 
4740 void Assembler::sha1rnds4(XMMRegister dst, XMMRegister src, int imm8) {
4741   assert(VM_Version::supports_sha(), "");
4742   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, /* rex_w */ false);
4743   emit_int8((unsigned char)0xCC);
4744   emit_int8((unsigned char)(0xC0 | encode));
4745   emit_int8((unsigned char)imm8);
4746 }
4747 
4748 void Assembler::sha1nexte(XMMRegister dst, XMMRegister src) {
4749   assert(VM_Version::supports_sha(), "");
4750   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4751   emit_int8((unsigned char)0xC8);
4752   emit_int8((unsigned char)(0xC0 | encode));
4753 }
4754 
4755 void Assembler::sha1msg1(XMMRegister dst, XMMRegister src) {
4756   assert(VM_Version::supports_sha(), "");
4757   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4758   emit_int8((unsigned char)0xC9);
4759   emit_int8((unsigned char)(0xC0 | encode));
4760 }
4761 
4762 void Assembler::sha1msg2(XMMRegister dst, XMMRegister src) {
4763   assert(VM_Version::supports_sha(), "");
4764   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4765   emit_int8((unsigned char)0xCA);
4766   emit_int8((unsigned char)(0xC0 | encode));
4767 }
4768 
4769 // xmm0 is implicit additional source to this instruction.
4770 void Assembler::sha256rnds2(XMMRegister dst, XMMRegister src) {
4771   assert(VM_Version::supports_sha(), "");
4772   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4773   emit_int8((unsigned char)0xCB);
4774   emit_int8((unsigned char)(0xC0 | encode));
4775 }
4776 
4777 void Assembler::sha256msg1(XMMRegister dst, XMMRegister src) {
4778   assert(VM_Version::supports_sha(), "");
4779   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4780   emit_int8((unsigned char)0xCC);
4781   emit_int8((unsigned char)(0xC0 | encode));
4782 }
4783 
4784 void Assembler::sha256msg2(XMMRegister dst, XMMRegister src) {
4785   assert(VM_Version::supports_sha(), "");
4786   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4787   emit_int8((unsigned char)0xCD);
4788   emit_int8((unsigned char)(0xC0 | encode));
4789 }
4790 
4791 
4792 void Assembler::shll(Register dst, int imm8) {
4793   assert(isShiftCount(imm8), "illegal shift count");
4794   int encode = prefix_and_encode(dst->encoding());
4795   if (imm8 == 1 ) {
4796     emit_int8((unsigned char)0xD1);
4797     emit_int8((unsigned char)(0xE0 | encode));
4798   } else {
4799     emit_int8((unsigned char)0xC1);
4800     emit_int8((unsigned char)(0xE0 | encode));
4801     emit_int8(imm8);
4802   }
4803 }
4804 
4805 void Assembler::shll(Register dst) {
4806   int encode = prefix_and_encode(dst->encoding());
4807   emit_int8((unsigned char)0xD3);
4808   emit_int8((unsigned char)(0xE0 | encode));
4809 }
4810 
4811 void Assembler::shrl(Register dst, int imm8) {
4812   assert(isShiftCount(imm8), "illegal shift count");
4813   int encode = prefix_and_encode(dst->encoding());
4814   emit_int8((unsigned char)0xC1);
4815   emit_int8((unsigned char)(0xE8 | encode));
4816   emit_int8(imm8);
4817 }
4818 
4819 void Assembler::shrl(Register dst) {
4820   int encode = prefix_and_encode(dst->encoding());
4821   emit_int8((unsigned char)0xD3);
4822   emit_int8((unsigned char)(0xE8 | encode));
4823 }
4824 
4825 // copies a single word from [esi] to [edi]
4826 void Assembler::smovl() {
4827   emit_int8((unsigned char)0xA5);
4828 }
4829 
4830 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
4831   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4832   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4833   attributes.set_rex_vex_w_reverted();
4834   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4835   emit_int8(0x51);
4836   emit_int8((unsigned char)(0xC0 | encode));
4837 }
4838 
4839 void Assembler::sqrtsd(XMMRegister dst, Address src) {
4840   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4841   InstructionMark im(this);
4842   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4843   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4844   attributes.set_rex_vex_w_reverted();
4845   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4846   emit_int8(0x51);
4847   emit_operand(dst, src);
4848 }
4849 
4850 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
4851   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4852   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4853   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4854   emit_int8(0x51);
4855   emit_int8((unsigned char)(0xC0 | encode));
4856 }
4857 
4858 void Assembler::std() {
4859   emit_int8((unsigned char)0xFD);
4860 }
4861 
4862 void Assembler::sqrtss(XMMRegister dst, Address src) {
4863   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4864   InstructionMark im(this);
4865   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4866   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4867   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4868   emit_int8(0x51);
4869   emit_operand(dst, src);
4870 }
4871 
4872 void Assembler::stmxcsr( Address dst) {
4873   if (UseAVX > 0 ) {
4874     assert(VM_Version::supports_avx(), "");
4875     InstructionMark im(this);
4876     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4877     vex_prefix(dst, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4878     emit_int8((unsigned char)0xAE);
4879     emit_operand(as_Register(3), dst);
4880   } else {
4881     NOT_LP64(assert(VM_Version::supports_sse(), ""));
4882     InstructionMark im(this);
4883     prefix(dst);
4884     emit_int8(0x0F);
4885     emit_int8((unsigned char)0xAE);
4886     emit_operand(as_Register(3), dst);
4887   }
4888 }
4889 
4890 void Assembler::subl(Address dst, int32_t imm32) {
4891   InstructionMark im(this);
4892   prefix(dst);
4893   emit_arith_operand(0x81, rbp, dst, imm32);
4894 }
4895 
4896 void Assembler::subl(Address dst, Register src) {
4897   InstructionMark im(this);
4898   prefix(dst, src);
4899   emit_int8(0x29);
4900   emit_operand(src, dst);
4901 }
4902 
4903 void Assembler::subl(Register dst, int32_t imm32) {
4904   prefix(dst);
4905   emit_arith(0x81, 0xE8, dst, imm32);
4906 }
4907 
4908 // Force generation of a 4 byte immediate value even if it fits into 8bit
4909 void Assembler::subl_imm32(Register dst, int32_t imm32) {
4910   prefix(dst);
4911   emit_arith_imm32(0x81, 0xE8, dst, imm32);
4912 }
4913 
4914 void Assembler::subl(Register dst, Address src) {
4915   InstructionMark im(this);
4916   prefix(src, dst);
4917   emit_int8(0x2B);
4918   emit_operand(dst, src);
4919 }
4920 
4921 void Assembler::subl(Register dst, Register src) {
4922   (void) prefix_and_encode(dst->encoding(), src->encoding());
4923   emit_arith(0x2B, 0xC0, dst, src);
4924 }
4925 
4926 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
4927   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4928   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4929   attributes.set_rex_vex_w_reverted();
4930   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4931   emit_int8(0x5C);
4932   emit_int8((unsigned char)(0xC0 | encode));
4933 }
4934 
4935 void Assembler::subsd(XMMRegister dst, Address src) {
4936   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4937   InstructionMark im(this);
4938   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4939   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4940   attributes.set_rex_vex_w_reverted();
4941   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4942   emit_int8(0x5C);
4943   emit_operand(dst, src);
4944 }
4945 
4946 void Assembler::subss(XMMRegister dst, XMMRegister src) {
4947   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4948   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true , /* uses_vl */ false);
4949   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4950   emit_int8(0x5C);
4951   emit_int8((unsigned char)(0xC0 | encode));
4952 }
4953 
4954 void Assembler::subss(XMMRegister dst, Address src) {
4955   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4956   InstructionMark im(this);
4957   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4958   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4959   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4960   emit_int8(0x5C);
4961   emit_operand(dst, src);
4962 }
4963 
4964 void Assembler::testb(Register dst, int imm8) {
4965   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
4966   (void) prefix_and_encode(dst->encoding(), true);
4967   emit_arith_b(0xF6, 0xC0, dst, imm8);
4968 }
4969 
4970 void Assembler::testb(Address dst, int imm8) {
4971   InstructionMark im(this);
4972   prefix(dst);
4973   emit_int8((unsigned char)0xF6);
4974   emit_operand(rax, dst, 1);
4975   emit_int8(imm8);
4976 }
4977 
4978 void Assembler::testl(Register dst, int32_t imm32) {
4979   // not using emit_arith because test
4980   // doesn't support sign-extension of
4981   // 8bit operands
4982   int encode = dst->encoding();
4983   if (encode == 0) {
4984     emit_int8((unsigned char)0xA9);
4985   } else {
4986     encode = prefix_and_encode(encode);
4987     emit_int8((unsigned char)0xF7);
4988     emit_int8((unsigned char)(0xC0 | encode));
4989   }
4990   emit_int32(imm32);
4991 }
4992 
4993 void Assembler::testl(Register dst, Register src) {
4994   (void) prefix_and_encode(dst->encoding(), src->encoding());
4995   emit_arith(0x85, 0xC0, dst, src);
4996 }
4997 
4998 void Assembler::testl(Register dst, Address src) {
4999   InstructionMark im(this);
5000   prefix(src, dst);
5001   emit_int8((unsigned char)0x85);
5002   emit_operand(dst, src);
5003 }
5004 
5005 void Assembler::tzcntl(Register dst, Register src) {
5006   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
5007   emit_int8((unsigned char)0xF3);
5008   int encode = prefix_and_encode(dst->encoding(), src->encoding());
5009   emit_int8(0x0F);
5010   emit_int8((unsigned char)0xBC);
5011   emit_int8((unsigned char)0xC0 | encode);
5012 }
5013 
5014 void Assembler::tzcntq(Register dst, Register src) {
5015   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
5016   emit_int8((unsigned char)0xF3);
5017   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5018   emit_int8(0x0F);
5019   emit_int8((unsigned char)0xBC);
5020   emit_int8((unsigned char)(0xC0 | encode));
5021 }
5022 
5023 void Assembler::ucomisd(XMMRegister dst, Address src) {
5024   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5025   InstructionMark im(this);
5026   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5027   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5028   attributes.set_rex_vex_w_reverted();
5029   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5030   emit_int8(0x2E);
5031   emit_operand(dst, src);
5032 }
5033 
5034 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
5035   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5036   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5037   attributes.set_rex_vex_w_reverted();
5038   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5039   emit_int8(0x2E);
5040   emit_int8((unsigned char)(0xC0 | encode));
5041 }
5042 
5043 void Assembler::ucomiss(XMMRegister dst, Address src) {
5044   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5045   InstructionMark im(this);
5046   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5047   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5048   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5049   emit_int8(0x2E);
5050   emit_operand(dst, src);
5051 }
5052 
5053 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
5054   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5055   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5056   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5057   emit_int8(0x2E);
5058   emit_int8((unsigned char)(0xC0 | encode));
5059 }
5060 
5061 void Assembler::xabort(int8_t imm8) {
5062   emit_int8((unsigned char)0xC6);
5063   emit_int8((unsigned char)0xF8);
5064   emit_int8((unsigned char)(imm8 & 0xFF));
5065 }
5066 
5067 void Assembler::xaddb(Address dst, Register src) {
5068   InstructionMark im(this);
5069   prefix(dst, src, true);
5070   emit_int8(0x0F);
5071   emit_int8((unsigned char)0xC0);
5072   emit_operand(src, dst);
5073 }
5074 
5075 void Assembler::xaddw(Address dst, Register src) {
5076   InstructionMark im(this);
5077   emit_int8(0x66);
5078   prefix(dst, src);
5079   emit_int8(0x0F);
5080   emit_int8((unsigned char)0xC1);
5081   emit_operand(src, dst);
5082 }
5083 
5084 void Assembler::xaddl(Address dst, Register src) {
5085   InstructionMark im(this);
5086   prefix(dst, src);
5087   emit_int8(0x0F);
5088   emit_int8((unsigned char)0xC1);
5089   emit_operand(src, dst);
5090 }
5091 
5092 void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
5093   InstructionMark im(this);
5094   relocate(rtype);
5095   if (abort.is_bound()) {
5096     address entry = target(abort);
5097     assert(entry != NULL, "abort entry NULL");
5098     intptr_t offset = entry - pc();
5099     emit_int8((unsigned char)0xC7);
5100     emit_int8((unsigned char)0xF8);
5101     emit_int32(offset - 6); // 2 opcode + 4 address
5102   } else {
5103     abort.add_patch_at(code(), locator());
5104     emit_int8((unsigned char)0xC7);
5105     emit_int8((unsigned char)0xF8);
5106     emit_int32(0);
5107   }
5108 }
5109 
5110 void Assembler::xchgb(Register dst, Address src) { // xchg
5111   InstructionMark im(this);
5112   prefix(src, dst, true);
5113   emit_int8((unsigned char)0x86);
5114   emit_operand(dst, src);
5115 }
5116 
5117 void Assembler::xchgw(Register dst, Address src) { // xchg
5118   InstructionMark im(this);
5119   emit_int8(0x66);
5120   prefix(src, dst);
5121   emit_int8((unsigned char)0x87);
5122   emit_operand(dst, src);
5123 }
5124 
5125 void Assembler::xchgl(Register dst, Address src) { // xchg
5126   InstructionMark im(this);
5127   prefix(src, dst);
5128   emit_int8((unsigned char)0x87);
5129   emit_operand(dst, src);
5130 }
5131 
5132 void Assembler::xchgl(Register dst, Register src) {
5133   int encode = prefix_and_encode(dst->encoding(), src->encoding());
5134   emit_int8((unsigned char)0x87);
5135   emit_int8((unsigned char)(0xC0 | encode));
5136 }
5137 
5138 void Assembler::xend() {
5139   emit_int8((unsigned char)0x0F);
5140   emit_int8((unsigned char)0x01);
5141   emit_int8((unsigned char)0xD5);
5142 }
5143 
5144 void Assembler::xgetbv() {
5145   emit_int8(0x0F);
5146   emit_int8(0x01);
5147   emit_int8((unsigned char)0xD0);
5148 }
5149 
5150 void Assembler::xorl(Register dst, int32_t imm32) {
5151   prefix(dst);
5152   emit_arith(0x81, 0xF0, dst, imm32);
5153 }
5154 
5155 void Assembler::xorl(Register dst, Address src) {
5156   InstructionMark im(this);
5157   prefix(src, dst);
5158   emit_int8(0x33);
5159   emit_operand(dst, src);
5160 }
5161 
5162 void Assembler::xorl(Register dst, Register src) {
5163   (void) prefix_and_encode(dst->encoding(), src->encoding());
5164   emit_arith(0x33, 0xC0, dst, src);
5165 }
5166 
5167 void Assembler::xorb(Register dst, Address src) {
5168   InstructionMark im(this);
5169   prefix(src, dst);
5170   emit_int8(0x32);
5171   emit_operand(dst, src);
5172 }
5173 
5174 // AVX 3-operands scalar float-point arithmetic instructions
5175 
5176 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
5177   assert(VM_Version::supports_avx(), "");
5178   InstructionMark im(this);
5179   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5180   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5181   attributes.set_rex_vex_w_reverted();
5182   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5183   emit_int8(0x58);
5184   emit_operand(dst, src);
5185 }
5186 
5187 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5188   assert(VM_Version::supports_avx(), "");
5189   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5190   attributes.set_rex_vex_w_reverted();
5191   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5192   emit_int8(0x58);
5193   emit_int8((unsigned char)(0xC0 | encode));
5194 }
5195 
5196 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
5197   assert(VM_Version::supports_avx(), "");
5198   InstructionMark im(this);
5199   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5200   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5201   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5202   emit_int8(0x58);
5203   emit_operand(dst, src);
5204 }
5205 
5206 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5207   assert(VM_Version::supports_avx(), "");
5208   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5209   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5210   emit_int8(0x58);
5211   emit_int8((unsigned char)(0xC0 | encode));
5212 }
5213 
5214 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
5215   assert(VM_Version::supports_avx(), "");
5216   InstructionMark im(this);
5217   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5218   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5219   attributes.set_rex_vex_w_reverted();
5220   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5221   emit_int8(0x5E);
5222   emit_operand(dst, src);
5223 }
5224 
5225 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5226   assert(VM_Version::supports_avx(), "");
5227   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5228   attributes.set_rex_vex_w_reverted();
5229   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5230   emit_int8(0x5E);
5231   emit_int8((unsigned char)(0xC0 | encode));
5232 }
5233 
5234 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
5235   assert(VM_Version::supports_avx(), "");
5236   InstructionMark im(this);
5237   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5238   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5239   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5240   emit_int8(0x5E);
5241   emit_operand(dst, src);
5242 }
5243 
5244 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5245   assert(VM_Version::supports_avx(), "");
5246   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5247   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5248   emit_int8(0x5E);
5249   emit_int8((unsigned char)(0xC0 | encode));
5250 }
5251 
5252 void Assembler::vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
5253   assert(VM_Version::supports_fma(), "");
5254   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5255   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5256   emit_int8((unsigned char)0xB9);
5257   emit_int8((unsigned char)(0xC0 | encode));
5258 }
5259 
5260 void Assembler::vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
5261   assert(VM_Version::supports_fma(), "");
5262   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5263   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5264   emit_int8((unsigned char)0xB9);
5265   emit_int8((unsigned char)(0xC0 | encode));
5266 }
5267 
5268 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
5269   assert(VM_Version::supports_avx(), "");
5270   InstructionMark im(this);
5271   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5272   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5273   attributes.set_rex_vex_w_reverted();
5274   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5275   emit_int8(0x59);
5276   emit_operand(dst, src);
5277 }
5278 
5279 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5280   assert(VM_Version::supports_avx(), "");
5281   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5282   attributes.set_rex_vex_w_reverted();
5283   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5284   emit_int8(0x59);
5285   emit_int8((unsigned char)(0xC0 | encode));
5286 }
5287 
5288 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
5289   assert(VM_Version::supports_avx(), "");
5290   InstructionMark im(this);
5291   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5292   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5293   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5294   emit_int8(0x59);
5295   emit_operand(dst, src);
5296 }
5297 
5298 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5299   assert(VM_Version::supports_avx(), "");
5300   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5301   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5302   emit_int8(0x59);
5303   emit_int8((unsigned char)(0xC0 | encode));
5304 }
5305 
5306 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
5307   assert(VM_Version::supports_avx(), "");
5308   InstructionMark im(this);
5309   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5310   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5311   attributes.set_rex_vex_w_reverted();
5312   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5313   emit_int8(0x5C);
5314   emit_operand(dst, src);
5315 }
5316 
5317 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5318   assert(VM_Version::supports_avx(), "");
5319   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5320   attributes.set_rex_vex_w_reverted();
5321   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5322   emit_int8(0x5C);
5323   emit_int8((unsigned char)(0xC0 | encode));
5324 }
5325 
5326 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
5327   assert(VM_Version::supports_avx(), "");
5328   InstructionMark im(this);
5329   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5330   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5331   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5332   emit_int8(0x5C);
5333   emit_operand(dst, src);
5334 }
5335 
5336 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5337   assert(VM_Version::supports_avx(), "");
5338   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5339   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5340   emit_int8(0x5C);
5341   emit_int8((unsigned char)(0xC0 | encode));
5342 }
5343 
5344 //====================VECTOR ARITHMETIC=====================================
5345 
5346 // Float-point vector arithmetic
5347 
5348 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
5349   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5350   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5351   attributes.set_rex_vex_w_reverted();
5352   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5353   emit_int8(0x58);
5354   emit_int8((unsigned char)(0xC0 | encode));
5355 }
5356 
5357 void Assembler::addpd(XMMRegister dst, Address src) {
5358   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5359   InstructionMark im(this);
5360   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5361   attributes.set_rex_vex_w_reverted();
5362   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5363   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5364   emit_int8(0x58);
5365   emit_operand(dst, src);
5366 }
5367 
5368 
5369 void Assembler::addps(XMMRegister dst, XMMRegister src) {
5370   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5371   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5372   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5373   emit_int8(0x58);
5374   emit_int8((unsigned char)(0xC0 | encode));
5375 }
5376 
5377 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5378   assert(VM_Version::supports_avx(), "");
5379   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5380   attributes.set_rex_vex_w_reverted();
5381   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5382   emit_int8(0x58);
5383   emit_int8((unsigned char)(0xC0 | encode));
5384 }
5385 
5386 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5387   assert(VM_Version::supports_avx(), "");
5388   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5389   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5390   emit_int8(0x58);
5391   emit_int8((unsigned char)(0xC0 | encode));
5392 }
5393 
5394 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5395   assert(VM_Version::supports_avx(), "");
5396   InstructionMark im(this);
5397   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5398   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5399   attributes.set_rex_vex_w_reverted();
5400   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5401   emit_int8(0x58);
5402   emit_operand(dst, src);
5403 }
5404 
5405 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5406   assert(VM_Version::supports_avx(), "");
5407   InstructionMark im(this);
5408   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5409   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5410   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5411   emit_int8(0x58);
5412   emit_operand(dst, src);
5413 }
5414 
5415 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
5416   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5417   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5418   attributes.set_rex_vex_w_reverted();
5419   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5420   emit_int8(0x5C);
5421   emit_int8((unsigned char)(0xC0 | encode));
5422 }
5423 
5424 void Assembler::subps(XMMRegister dst, XMMRegister src) {
5425   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5426   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5427   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5428   emit_int8(0x5C);
5429   emit_int8((unsigned char)(0xC0 | encode));
5430 }
5431 
5432 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5433   assert(VM_Version::supports_avx(), "");
5434   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5435   attributes.set_rex_vex_w_reverted();
5436   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5437   emit_int8(0x5C);
5438   emit_int8((unsigned char)(0xC0 | encode));
5439 }
5440 
5441 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5442   assert(VM_Version::supports_avx(), "");
5443   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5444   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5445   emit_int8(0x5C);
5446   emit_int8((unsigned char)(0xC0 | encode));
5447 }
5448 
5449 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5450   assert(VM_Version::supports_avx(), "");
5451   InstructionMark im(this);
5452   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5453   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5454   attributes.set_rex_vex_w_reverted();
5455   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5456   emit_int8(0x5C);
5457   emit_operand(dst, src);
5458 }
5459 
5460 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5461   assert(VM_Version::supports_avx(), "");
5462   InstructionMark im(this);
5463   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5464   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5465   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5466   emit_int8(0x5C);
5467   emit_operand(dst, src);
5468 }
5469 
5470 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
5471   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5472   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5473   attributes.set_rex_vex_w_reverted();
5474   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5475   emit_int8(0x59);
5476   emit_int8((unsigned char)(0xC0 | encode));
5477 }
5478 
5479 void Assembler::mulpd(XMMRegister dst, Address src) {
5480   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5481   InstructionMark im(this);
5482   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5483   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5484   attributes.set_rex_vex_w_reverted();
5485   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5486   emit_int8(0x59);
5487   emit_operand(dst, src);
5488 }
5489 
5490 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
5491   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5492   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5493   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5494   emit_int8(0x59);
5495   emit_int8((unsigned char)(0xC0 | encode));
5496 }
5497 
5498 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5499   assert(VM_Version::supports_avx(), "");
5500   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5501   attributes.set_rex_vex_w_reverted();
5502   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5503   emit_int8(0x59);
5504   emit_int8((unsigned char)(0xC0 | encode));
5505 }
5506 
5507 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5508   assert(VM_Version::supports_avx(), "");
5509   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5510   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5511   emit_int8(0x59);
5512   emit_int8((unsigned char)(0xC0 | encode));
5513 }
5514 
5515 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5516   assert(VM_Version::supports_avx(), "");
5517   InstructionMark im(this);
5518   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5519   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5520   attributes.set_rex_vex_w_reverted();
5521   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5522   emit_int8(0x59);
5523   emit_operand(dst, src);
5524 }
5525 
5526 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5527   assert(VM_Version::supports_avx(), "");
5528   InstructionMark im(this);
5529   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5530   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5531   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5532   emit_int8(0x59);
5533   emit_operand(dst, src);
5534 }
5535 
5536 void Assembler::vfmadd231pd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
5537   assert(VM_Version::supports_fma(), "");
5538   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5539   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5540   emit_int8((unsigned char)0xB8);
5541   emit_int8((unsigned char)(0xC0 | encode));
5542 }
5543 
5544 void Assembler::vfmadd231ps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
5545   assert(VM_Version::supports_fma(), "");
5546   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5547   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5548   emit_int8((unsigned char)0xB8);
5549   emit_int8((unsigned char)(0xC0 | encode));
5550 }
5551 
5552 void Assembler::vfmadd231pd(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) {
5553   assert(VM_Version::supports_fma(), "");
5554   InstructionMark im(this);
5555   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5556   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5557   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5558   emit_int8((unsigned char)0xB8);
5559   emit_operand(dst, src2);
5560 }
5561 
5562 void Assembler::vfmadd231ps(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) {
5563   assert(VM_Version::supports_fma(), "");
5564   InstructionMark im(this);
5565   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5566   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5567   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5568   emit_int8((unsigned char)0xB8);
5569   emit_operand(dst, src2);
5570 }
5571 
5572 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
5573   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5574   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5575   attributes.set_rex_vex_w_reverted();
5576   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5577   emit_int8(0x5E);
5578   emit_int8((unsigned char)(0xC0 | encode));
5579 }
5580 
5581 void Assembler::divps(XMMRegister dst, XMMRegister src) {
5582   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5583   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5584   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5585   emit_int8(0x5E);
5586   emit_int8((unsigned char)(0xC0 | encode));
5587 }
5588 
5589 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5590   assert(VM_Version::supports_avx(), "");
5591   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5592   attributes.set_rex_vex_w_reverted();
5593   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5594   emit_int8(0x5E);
5595   emit_int8((unsigned char)(0xC0 | encode));
5596 }
5597 
5598 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5599   assert(VM_Version::supports_avx(), "");
5600   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5601   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5602   emit_int8(0x5E);
5603   emit_int8((unsigned char)(0xC0 | encode));
5604 }
5605 
5606 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5607   assert(VM_Version::supports_avx(), "");
5608   InstructionMark im(this);
5609   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5610   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5611   attributes.set_rex_vex_w_reverted();
5612   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5613   emit_int8(0x5E);
5614   emit_operand(dst, src);
5615 }
5616 
5617 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5618   assert(VM_Version::supports_avx(), "");
5619   InstructionMark im(this);
5620   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5621   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5622   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5623   emit_int8(0x5E);
5624   emit_operand(dst, src);
5625 }
5626 
5627 void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
5628   assert(VM_Version::supports_avx(), "");
5629   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5630   attributes.set_rex_vex_w_reverted();
5631   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5632   emit_int8(0x51);
5633   emit_int8((unsigned char)(0xC0 | encode));
5634 }
5635 
5636 void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
5637   assert(VM_Version::supports_avx(), "");
5638   InstructionMark im(this);
5639   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5640   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5641   attributes.set_rex_vex_w_reverted();
5642   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5643   emit_int8(0x51);
5644   emit_operand(dst, src);
5645 }
5646 
5647 void Assembler::vsqrtps(XMMRegister dst, XMMRegister src, int vector_len) {
5648   assert(VM_Version::supports_avx(), "");
5649   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5650   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5651   emit_int8(0x51);
5652   emit_int8((unsigned char)(0xC0 | encode));
5653 }
5654 
5655 void Assembler::vsqrtps(XMMRegister dst, Address src, int vector_len) {
5656   assert(VM_Version::supports_avx(), "");
5657   InstructionMark im(this);
5658   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5659   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5660   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5661   emit_int8(0x51);
5662   emit_operand(dst, src);
5663 }
5664 
5665 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
5666   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5667   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5668   attributes.set_rex_vex_w_reverted();
5669   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5670   emit_int8(0x54);
5671   emit_int8((unsigned char)(0xC0 | encode));
5672 }
5673 
5674 void Assembler::andps(XMMRegister dst, XMMRegister src) {
5675   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5676   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5677   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5678   emit_int8(0x54);
5679   emit_int8((unsigned char)(0xC0 | encode));
5680 }
5681 
5682 void Assembler::andps(XMMRegister dst, Address src) {
5683   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5684   InstructionMark im(this);
5685   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5686   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5687   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5688   emit_int8(0x54);
5689   emit_operand(dst, src);
5690 }
5691 
5692 void Assembler::andpd(XMMRegister dst, Address src) {
5693   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5694   InstructionMark im(this);
5695   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5696   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5697   attributes.set_rex_vex_w_reverted();
5698   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5699   emit_int8(0x54);
5700   emit_operand(dst, src);
5701 }
5702 
5703 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5704   assert(VM_Version::supports_avx(), "");
5705   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5706   attributes.set_rex_vex_w_reverted();
5707   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5708   emit_int8(0x54);
5709   emit_int8((unsigned char)(0xC0 | encode));
5710 }
5711 
5712 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5713   assert(VM_Version::supports_avx(), "");
5714   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5715   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5716   emit_int8(0x54);
5717   emit_int8((unsigned char)(0xC0 | encode));
5718 }
5719 
5720 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5721   assert(VM_Version::supports_avx(), "");
5722   InstructionMark im(this);
5723   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5724   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5725   attributes.set_rex_vex_w_reverted();
5726   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5727   emit_int8(0x54);
5728   emit_operand(dst, src);
5729 }
5730 
5731 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5732   assert(VM_Version::supports_avx(), "");
5733   InstructionMark im(this);
5734   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5735   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5736   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5737   emit_int8(0x54);
5738   emit_operand(dst, src);
5739 }
5740 
5741 void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
5742   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5743   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5744   attributes.set_rex_vex_w_reverted();
5745   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5746   emit_int8(0x15);
5747   emit_int8((unsigned char)(0xC0 | encode));
5748 }
5749 
5750 void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
5751   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5752   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5753   attributes.set_rex_vex_w_reverted();
5754   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5755   emit_int8(0x14);
5756   emit_int8((unsigned char)(0xC0 | encode));
5757 }
5758 
5759 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
5760   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5761   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5762   attributes.set_rex_vex_w_reverted();
5763   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5764   emit_int8(0x57);
5765   emit_int8((unsigned char)(0xC0 | encode));
5766 }
5767 
5768 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
5769   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5770   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5771   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5772   emit_int8(0x57);
5773   emit_int8((unsigned char)(0xC0 | encode));
5774 }
5775 
5776 void Assembler::xorpd(XMMRegister dst, Address src) {
5777   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5778   InstructionMark im(this);
5779   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5780   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5781   attributes.set_rex_vex_w_reverted();
5782   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5783   emit_int8(0x57);
5784   emit_operand(dst, src);
5785 }
5786 
5787 void Assembler::xorps(XMMRegister dst, Address src) {
5788   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5789   InstructionMark im(this);
5790   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5791   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5792   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5793   emit_int8(0x57);
5794   emit_operand(dst, src);
5795 }
5796 
5797 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5798   assert(VM_Version::supports_avx(), "");
5799   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5800   attributes.set_rex_vex_w_reverted();
5801   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5802   emit_int8(0x57);
5803   emit_int8((unsigned char)(0xC0 | encode));
5804 }
5805 
5806 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5807   assert(VM_Version::supports_avx(), "");
5808   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5809   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5810   emit_int8(0x57);
5811   emit_int8((unsigned char)(0xC0 | encode));
5812 }
5813 
5814 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5815   assert(VM_Version::supports_avx(), "");
5816   InstructionMark im(this);
5817   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5818   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5819   attributes.set_rex_vex_w_reverted();
5820   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5821   emit_int8(0x57);
5822   emit_operand(dst, src);
5823 }
5824 
5825 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5826   assert(VM_Version::supports_avx(), "");
5827   InstructionMark im(this);
5828   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5829   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5830   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5831   emit_int8(0x57);
5832   emit_operand(dst, src);
5833 }
5834 
5835 // Integer vector arithmetic
5836 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5837   assert(VM_Version::supports_avx() && (vector_len == 0) ||
5838          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
5839   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5840   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5841   emit_int8(0x01);
5842   emit_int8((unsigned char)(0xC0 | encode));
5843 }
5844 
5845 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5846   assert(VM_Version::supports_avx() && (vector_len == 0) ||
5847          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
5848   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
5849   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5850   emit_int8(0x02);
5851   emit_int8((unsigned char)(0xC0 | encode));
5852 }
5853 
5854 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
5855   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5856   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5857   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5858   emit_int8((unsigned char)0xFC);
5859   emit_int8((unsigned char)(0xC0 | encode));
5860 }
5861 
5862 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
5863   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5864   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5865   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5866   emit_int8((unsigned char)0xFD);
5867   emit_int8((unsigned char)(0xC0 | encode));
5868 }
5869 
5870 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
5871   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5872   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5873   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5874   emit_int8((unsigned char)0xFE);
5875   emit_int8((unsigned char)(0xC0 | encode));
5876 }
5877 
5878 void Assembler::paddd(XMMRegister dst, Address src) {
5879   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5880   InstructionMark im(this);
5881   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5882   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5883   emit_int8((unsigned char)0xFE);
5884   emit_operand(dst, src);
5885 }
5886 
5887 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
5888   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5889   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5890   attributes.set_rex_vex_w_reverted();
5891   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5892   emit_int8((unsigned char)0xD4);
5893   emit_int8((unsigned char)(0xC0 | encode));
5894 }
5895 
5896 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
5897   assert(VM_Version::supports_sse3(), "");
5898   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5899   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5900   emit_int8(0x01);
5901   emit_int8((unsigned char)(0xC0 | encode));
5902 }
5903 
5904 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
5905   assert(VM_Version::supports_sse3(), "");
5906   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
5907   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5908   emit_int8(0x02);
5909   emit_int8((unsigned char)(0xC0 | encode));
5910 }
5911 
5912 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5913   assert(UseAVX > 0, "requires some form of AVX");
5914   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5915   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5916   emit_int8((unsigned char)0xFC);
5917   emit_int8((unsigned char)(0xC0 | encode));
5918 }
5919 
5920 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5921   assert(UseAVX > 0, "requires some form of AVX");
5922   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5923   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5924   emit_int8((unsigned char)0xFD);
5925   emit_int8((unsigned char)(0xC0 | encode));
5926 }
5927 
5928 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5929   assert(UseAVX > 0, "requires some form of AVX");
5930   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5931   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5932   emit_int8((unsigned char)0xFE);
5933   emit_int8((unsigned char)(0xC0 | encode));
5934 }
5935 
5936 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5937   assert(UseAVX > 0, "requires some form of AVX");
5938   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5939   attributes.set_rex_vex_w_reverted();
5940   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5941   emit_int8((unsigned char)0xD4);
5942   emit_int8((unsigned char)(0xC0 | encode));
5943 }
5944 
5945 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5946   assert(UseAVX > 0, "requires some form of AVX");
5947   InstructionMark im(this);
5948   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5949   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5950   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5951   emit_int8((unsigned char)0xFC);
5952   emit_operand(dst, src);
5953 }
5954 
5955 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5956   assert(UseAVX > 0, "requires some form of AVX");
5957   InstructionMark im(this);
5958   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5959   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5960   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5961   emit_int8((unsigned char)0xFD);
5962   emit_operand(dst, src);
5963 }
5964 
5965 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5966   assert(UseAVX > 0, "requires some form of AVX");
5967   InstructionMark im(this);
5968   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5969   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5970   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5971   emit_int8((unsigned char)0xFE);
5972   emit_operand(dst, src);
5973 }
5974 
5975 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5976   assert(UseAVX > 0, "requires some form of AVX");
5977   InstructionMark im(this);
5978   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5979   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5980   attributes.set_rex_vex_w_reverted();
5981   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5982   emit_int8((unsigned char)0xD4);
5983   emit_operand(dst, src);
5984 }
5985 
5986 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
5987   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5988   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5989   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5990   emit_int8((unsigned char)0xF8);
5991   emit_int8((unsigned char)(0xC0 | encode));
5992 }
5993 
5994 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
5995   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5996   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5997   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5998   emit_int8((unsigned char)0xF9);
5999   emit_int8((unsigned char)(0xC0 | encode));
6000 }
6001 
6002 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
6003   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6004   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6005   emit_int8((unsigned char)0xFA);
6006   emit_int8((unsigned char)(0xC0 | encode));
6007 }
6008 
6009 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
6010   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6011   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6012   attributes.set_rex_vex_w_reverted();
6013   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6014   emit_int8((unsigned char)0xFB);
6015   emit_int8((unsigned char)(0xC0 | encode));
6016 }
6017 
6018 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6019   assert(UseAVX > 0, "requires some form of AVX");
6020   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6021   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6022   emit_int8((unsigned char)0xF8);
6023   emit_int8((unsigned char)(0xC0 | encode));
6024 }
6025 
6026 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6027   assert(UseAVX > 0, "requires some form of AVX");
6028   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6029   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6030   emit_int8((unsigned char)0xF9);
6031   emit_int8((unsigned char)(0xC0 | encode));
6032 }
6033 
6034 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6035   assert(UseAVX > 0, "requires some form of AVX");
6036   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6037   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6038   emit_int8((unsigned char)0xFA);
6039   emit_int8((unsigned char)(0xC0 | encode));
6040 }
6041 
6042 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6043   assert(UseAVX > 0, "requires some form of AVX");
6044   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6045   attributes.set_rex_vex_w_reverted();
6046   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6047   emit_int8((unsigned char)0xFB);
6048   emit_int8((unsigned char)(0xC0 | encode));
6049 }
6050 
6051 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6052   assert(UseAVX > 0, "requires some form of AVX");
6053   InstructionMark im(this);
6054   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6055   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
6056   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6057   emit_int8((unsigned char)0xF8);
6058   emit_operand(dst, src);
6059 }
6060 
6061 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6062   assert(UseAVX > 0, "requires some form of AVX");
6063   InstructionMark im(this);
6064   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6065   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
6066   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6067   emit_int8((unsigned char)0xF9);
6068   emit_operand(dst, src);
6069 }
6070 
6071 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6072   assert(UseAVX > 0, "requires some form of AVX");
6073   InstructionMark im(this);
6074   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6075   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6076   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6077   emit_int8((unsigned char)0xFA);
6078   emit_operand(dst, src);
6079 }
6080 
6081 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6082   assert(UseAVX > 0, "requires some form of AVX");
6083   InstructionMark im(this);
6084   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6085   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
6086   attributes.set_rex_vex_w_reverted();
6087   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6088   emit_int8((unsigned char)0xFB);
6089   emit_operand(dst, src);
6090 }
6091 
6092 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
6093   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6094   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6095   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6096   emit_int8((unsigned char)0xD5);
6097   emit_int8((unsigned char)(0xC0 | encode));
6098 }
6099 
6100 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
6101   assert(VM_Version::supports_sse4_1(), "");
6102   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6103   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6104   emit_int8(0x40);
6105   emit_int8((unsigned char)(0xC0 | encode));
6106 }
6107 
6108 void Assembler::pmuludq(XMMRegister dst, XMMRegister src) {
6109   assert(VM_Version::supports_sse2(), "");
6110   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6111   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6112   emit_int8((unsigned char)(0xF4));
6113   emit_int8((unsigned char)(0xC0 | encode));
6114 }
6115 
6116 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6117   assert(UseAVX > 0, "requires some form of AVX");
6118   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6119   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6120   emit_int8((unsigned char)0xD5);
6121   emit_int8((unsigned char)(0xC0 | encode));
6122 }
6123 
6124 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6125   assert(UseAVX > 0, "requires some form of AVX");
6126   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6127   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6128   emit_int8(0x40);
6129   emit_int8((unsigned char)(0xC0 | encode));
6130 }
6131 
6132 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6133   assert(UseAVX > 2, "requires some form of EVEX");
6134   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
6135   attributes.set_is_evex_instruction();
6136   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6137   emit_int8(0x40);
6138   emit_int8((unsigned char)(0xC0 | encode));
6139 }
6140 
6141 void Assembler::vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6142   assert(UseAVX > 0, "requires some form of AVX");
6143   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6144   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6145   emit_int8((unsigned char)(0xF4));
6146   emit_int8((unsigned char)(0xC0 | encode));
6147 }
6148 
6149 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6150   assert(UseAVX > 0, "requires some form of AVX");
6151   InstructionMark im(this);
6152   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6153   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
6154   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6155   emit_int8((unsigned char)0xD5);
6156   emit_operand(dst, src);
6157 }
6158 
6159 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6160   assert(UseAVX > 0, "requires some form of AVX");
6161   InstructionMark im(this);
6162   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6163   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6164   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6165   emit_int8(0x40);
6166   emit_operand(dst, src);
6167 }
6168 
6169 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6170   assert(UseAVX > 2, "requires some form of EVEX");
6171   InstructionMark im(this);
6172   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
6173   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
6174   attributes.set_is_evex_instruction();
6175   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6176   emit_int8(0x40);
6177   emit_operand(dst, src);
6178 }
6179 
6180 // Min, max
6181 void Assembler::pminsb(XMMRegister dst, XMMRegister src) {
6182   assert(VM_Version::supports_sse4_1(), "");
6183   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
6184   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6185   emit_int8(0x38);
6186   emit_int8((unsigned char)(0xC0 | encode));
6187 }
6188 
6189 void Assembler::vpminsb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6190   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6191         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
6192   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
6193   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6194   emit_int8(0x39);
6195   emit_int8((unsigned char)(0xC0 | encode));
6196 }
6197 
6198 void Assembler::pminsw(XMMRegister dst, XMMRegister src) {
6199   assert(VM_Version::supports_sse2(), "");
6200   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
6201   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6202   emit_int8((unsigned char)0xEA);
6203   emit_int8((unsigned char)(0xC0 | encode));
6204 }
6205 
6206 void Assembler::vpminsw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6207   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6208         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
6209   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
6210   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6211   emit_int8((unsigned char)0xEA);
6212   emit_int8((unsigned char)(0xC0 | encode));
6213 }
6214 
6215 void Assembler::pminsd(XMMRegister dst, XMMRegister src) {
6216   assert(VM_Version::supports_sse4_1(), "");
6217   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
6218   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6219   emit_int8(0x39);
6220   emit_int8((unsigned char)(0xC0 | encode));
6221 }
6222 
6223 void Assembler::vpminsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6224   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6225         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex()), "");
6226   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
6227   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6228   emit_int8(0x39);
6229   emit_int8((unsigned char)(0xC0 | encode));
6230 }
6231 
6232 void Assembler::vpminsq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6233   assert(UseAVX > 2, "requires AVX512F");
6234   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6235   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6236   emit_int8(0x39);
6237   emit_int8((unsigned char)(0xC0 | encode));
6238 }
6239 
6240 void Assembler::minps(XMMRegister dst, XMMRegister src) {
6241   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6242   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6243   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6244   emit_int8(0x5D);
6245   emit_int8((unsigned char)(0xC0 | encode));
6246 }
6247 void Assembler::vminps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6248   assert(vector_len >= AVX_512bit ? VM_Version::supports_avx512vl() : VM_Version::supports_avx(), "");
6249   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
6250   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6251   emit_int8(0x5D);
6252   emit_int8((unsigned char)(0xC0 | encode));
6253 }
6254 
6255 void Assembler::minpd(XMMRegister dst, XMMRegister src) {
6256   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6257   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6258   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6259   emit_int8(0x5D);
6260   emit_int8((unsigned char)(0xC0 | encode));
6261 }
6262 void Assembler::vminpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6263   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6264         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
6265   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
6266   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6267   emit_int8(0x5D);
6268   emit_int8((unsigned char)(0xC0 | encode));
6269 }
6270 
6271 void Assembler::pmaxsb(XMMRegister dst, XMMRegister src) {
6272   assert(VM_Version::supports_sse4_1(), "");
6273   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
6274   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6275   emit_int8(0x3C);
6276   emit_int8((unsigned char)(0xC0 | encode));
6277 }
6278 
6279 void Assembler::vpmaxsb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6280   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6281         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
6282   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
6283   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6284   emit_int8(0x3C);
6285   emit_int8((unsigned char)(0xC0 | encode));
6286 }
6287 
6288 void Assembler::pmaxsw(XMMRegister dst, XMMRegister src) {
6289   assert(VM_Version::supports_sse2(), "");
6290   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
6291   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6292   emit_int8((unsigned char)0xEE);
6293   emit_int8((unsigned char)(0xC0 | encode));
6294 }
6295 
6296 void Assembler::vpmaxsw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6297   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6298         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
6299   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
6300   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6301   emit_int8((unsigned char)0xEE);
6302   emit_int8((unsigned char)(0xC0 | encode));
6303 }
6304 
6305 void Assembler::pmaxsd(XMMRegister dst, XMMRegister src) {
6306   assert(VM_Version::supports_sse4_1(), "");
6307   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
6308   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6309   emit_int8(0x3D);
6310   emit_int8((unsigned char)(0xC0 | encode));
6311 }
6312 
6313 void Assembler::vpmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6314   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6315         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex()), "");
6316   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
6317   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6318   emit_int8(0x3D);
6319   emit_int8((unsigned char)(0xC0 | encode));
6320 }
6321 
6322 void Assembler::vpmaxsq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6323   assert(UseAVX > 2, "requires AVX512F");
6324   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6325   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6326   emit_int8(0x3D);
6327   emit_int8((unsigned char)(0xC0 | encode));
6328 }
6329 
6330 void Assembler::maxps(XMMRegister dst, XMMRegister src) {
6331   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6332   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6333   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6334   emit_int8(0x5F);
6335   emit_int8((unsigned char)(0xC0 | encode));
6336 }
6337 
6338 void Assembler::vmaxps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6339   assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
6340   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
6341   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6342   emit_int8(0x5F);
6343   emit_int8((unsigned char)(0xC0 | encode));
6344 }
6345 
6346 void Assembler::maxpd(XMMRegister dst, XMMRegister src) {
6347   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6348   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6349   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6350   emit_int8(0x5F);
6351   emit_int8((unsigned char)(0xC0 | encode));
6352 }
6353 
6354 void Assembler::vmaxpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6355   assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
6356   InstructionAttr attributes(vector_len, /* vex_w */true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
6357   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6358   emit_int8(0x5F);
6359   emit_int8((unsigned char)(0xC0 | encode));
6360 }
6361 
6362 // Shift packed integers left by specified number of bits.
6363 void Assembler::psllw(XMMRegister dst, int shift) {
6364   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6365   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6366   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
6367   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6368   emit_int8(0x71);
6369   emit_int8((unsigned char)(0xC0 | encode));
6370   emit_int8(shift & 0xFF);
6371 }
6372 
6373 void Assembler::pslld(XMMRegister dst, int shift) {
6374   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6375   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6376   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
6377   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6378   emit_int8(0x72);
6379   emit_int8((unsigned char)(0xC0 | encode));
6380   emit_int8(shift & 0xFF);
6381 }
6382 
6383 void Assembler::psllq(XMMRegister dst, int shift) {
6384   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6385   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6386   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
6387   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6388   emit_int8(0x73);
6389   emit_int8((unsigned char)(0xC0 | encode));
6390   emit_int8(shift & 0xFF);
6391 }
6392 
6393 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
6394   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6395   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6396   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6397   emit_int8((unsigned char)0xF1);
6398   emit_int8((unsigned char)(0xC0 | encode));
6399 }
6400 
6401 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
6402   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6403   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6404   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6405   emit_int8((unsigned char)0xF2);
6406   emit_int8((unsigned char)(0xC0 | encode));
6407 }
6408 
6409 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
6410   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6411   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6412   attributes.set_rex_vex_w_reverted();
6413   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6414   emit_int8((unsigned char)0xF3);
6415   emit_int8((unsigned char)(0xC0 | encode));
6416 }
6417 
6418 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6419   assert(UseAVX > 0, "requires some form of AVX");
6420   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6421   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
6422   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6423   emit_int8(0x71);
6424   emit_int8((unsigned char)(0xC0 | encode));
6425   emit_int8(shift & 0xFF);
6426 }
6427 
6428 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6429   assert(UseAVX > 0, "requires some form of AVX");
6430   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6431   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6432   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
6433   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6434   emit_int8(0x72);
6435   emit_int8((unsigned char)(0xC0 | encode));
6436   emit_int8(shift & 0xFF);
6437 }
6438 
6439 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6440   assert(UseAVX > 0, "requires some form of AVX");
6441   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6442   attributes.set_rex_vex_w_reverted();
6443   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
6444   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6445   emit_int8(0x73);
6446   emit_int8((unsigned char)(0xC0 | encode));
6447   emit_int8(shift & 0xFF);
6448 }
6449 
6450 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6451   assert(UseAVX > 0, "requires some form of AVX");
6452   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6453   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6454   emit_int8((unsigned char)0xF1);
6455   emit_int8((unsigned char)(0xC0 | encode));
6456 }
6457 
6458 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6459   assert(UseAVX > 0, "requires some form of AVX");
6460   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6461   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6462   emit_int8((unsigned char)0xF2);
6463   emit_int8((unsigned char)(0xC0 | encode));
6464 }
6465 
6466 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6467   assert(UseAVX > 0, "requires some form of AVX");
6468   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6469   attributes.set_rex_vex_w_reverted();
6470   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6471   emit_int8((unsigned char)0xF3);
6472   emit_int8((unsigned char)(0xC0 | encode));
6473 }
6474 
6475 // Shift packed integers logically right by specified number of bits.
6476 void Assembler::psrlw(XMMRegister dst, int shift) {
6477   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6478   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6479   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
6480   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6481   emit_int8(0x71);
6482   emit_int8((unsigned char)(0xC0 | encode));
6483   emit_int8(shift & 0xFF);
6484 }
6485 
6486 void Assembler::psrld(XMMRegister dst, int shift) {
6487   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6488   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6489   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
6490   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6491   emit_int8(0x72);
6492   emit_int8((unsigned char)(0xC0 | encode));
6493   emit_int8(shift & 0xFF);
6494 }
6495 
6496 void Assembler::psrlq(XMMRegister dst, int shift) {
6497   // Do not confuse it with psrldq SSE2 instruction which
6498   // shifts 128 bit value in xmm register by number of bytes.
6499   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6500   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6501   attributes.set_rex_vex_w_reverted();
6502   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
6503   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6504   emit_int8(0x73);
6505   emit_int8((unsigned char)(0xC0 | encode));
6506   emit_int8(shift & 0xFF);
6507 }
6508 
6509 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
6510   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6511   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6512   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6513   emit_int8((unsigned char)0xD1);
6514   emit_int8((unsigned char)(0xC0 | encode));
6515 }
6516 
6517 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
6518   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6519   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6520   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6521   emit_int8((unsigned char)0xD2);
6522   emit_int8((unsigned char)(0xC0 | encode));
6523 }
6524 
6525 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
6526   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6527   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6528   attributes.set_rex_vex_w_reverted();
6529   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6530   emit_int8((unsigned char)0xD3);
6531   emit_int8((unsigned char)(0xC0 | encode));
6532 }
6533 
6534 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6535   assert(UseAVX > 0, "requires some form of AVX");
6536   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6537   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
6538   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6539   emit_int8(0x71);
6540   emit_int8((unsigned char)(0xC0 | encode));
6541   emit_int8(shift & 0xFF);
6542 }
6543 
6544 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6545   assert(UseAVX > 0, "requires some form of AVX");
6546   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6547   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
6548   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6549   emit_int8(0x72);
6550   emit_int8((unsigned char)(0xC0 | encode));
6551   emit_int8(shift & 0xFF);
6552 }
6553 
6554 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6555   assert(UseAVX > 0, "requires some form of AVX");
6556   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6557   attributes.set_rex_vex_w_reverted();
6558   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
6559   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6560   emit_int8(0x73);
6561   emit_int8((unsigned char)(0xC0 | encode));
6562   emit_int8(shift & 0xFF);
6563 }
6564 
6565 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6566   assert(UseAVX > 0, "requires some form of AVX");
6567   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6568   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6569   emit_int8((unsigned char)0xD1);
6570   emit_int8((unsigned char)(0xC0 | encode));
6571 }
6572 
6573 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6574   assert(UseAVX > 0, "requires some form of AVX");
6575   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6576   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6577   emit_int8((unsigned char)0xD2);
6578   emit_int8((unsigned char)(0xC0 | encode));
6579 }
6580 
6581 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6582   assert(UseAVX > 0, "requires some form of AVX");
6583   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6584   attributes.set_rex_vex_w_reverted();
6585   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6586   emit_int8((unsigned char)0xD3);
6587   emit_int8((unsigned char)(0xC0 | encode));
6588 }
6589 
6590 // Shift packed integers arithmetically right by specified number of bits.
6591 void Assembler::psraw(XMMRegister dst, int shift) {
6592   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6593   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6594   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6595   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6596   emit_int8(0x71);
6597   emit_int8((unsigned char)(0xC0 | encode));
6598   emit_int8(shift & 0xFF);
6599 }
6600 
6601 void Assembler::psrad(XMMRegister dst, int shift) {
6602   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6603   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6604   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
6605   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6606   emit_int8(0x72);
6607   emit_int8((unsigned char)(0xC0 | encode));
6608   emit_int8(shift & 0xFF);
6609 }
6610 
6611 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
6612   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6613   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6614   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6615   emit_int8((unsigned char)0xE1);
6616   emit_int8((unsigned char)(0xC0 | encode));
6617 }
6618 
6619 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
6620   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6621   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6622   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6623   emit_int8((unsigned char)0xE2);
6624   emit_int8((unsigned char)(0xC0 | encode));
6625 }
6626 
6627 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6628   assert(UseAVX > 0, "requires some form of AVX");
6629   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6630   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6631   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6632   emit_int8(0x71);
6633   emit_int8((unsigned char)(0xC0 | encode));
6634   emit_int8(shift & 0xFF);
6635 }
6636 
6637 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6638   assert(UseAVX > 0, "requires some form of AVX");
6639   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6640   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6641   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6642   emit_int8(0x72);
6643   emit_int8((unsigned char)(0xC0 | encode));
6644   emit_int8(shift & 0xFF);
6645 }
6646 
6647 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6648   assert(UseAVX > 0, "requires some form of AVX");
6649   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6650   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6651   emit_int8((unsigned char)0xE1);
6652   emit_int8((unsigned char)(0xC0 | encode));
6653 }
6654 
6655 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6656   assert(UseAVX > 0, "requires some form of AVX");
6657   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6658   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6659   emit_int8((unsigned char)0xE2);
6660   emit_int8((unsigned char)(0xC0 | encode));
6661 }
6662 
6663 //Variable Shift packed integers logically left.
6664 void Assembler::vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6665   assert(UseAVX > 1, "requires AVX2");
6666   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6667   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6668   emit_int8(0x47);
6669   emit_int8((unsigned char)(0xC0 | encode));
6670 }
6671 
6672 void Assembler::vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6673   assert(UseAVX > 1, "requires AVX2");
6674   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6675   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6676   emit_int8(0x47);
6677   emit_int8((unsigned char)(0xC0 | encode));
6678 }
6679 
6680 //Variable Shift packed integers logically right.
6681 void Assembler::vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6682   assert(UseAVX > 1, "requires AVX2");
6683   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6684   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6685   emit_int8(0x45);
6686   emit_int8((unsigned char)(0xC0 | encode));
6687 }
6688 
6689 void Assembler::vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6690   assert(UseAVX > 1, "requires AVX2");
6691   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6692   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6693   emit_int8(0x45);
6694   emit_int8((unsigned char)(0xC0 | encode));
6695 }
6696 
6697 //Variable right Shift arithmetic packed integers .
6698 void Assembler::vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6699   assert(UseAVX > 1, "requires AVX2");
6700   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6701   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6702   emit_int8(0x46);
6703   emit_int8((unsigned char)(0xC0 | encode));
6704 }
6705 
6706 void Assembler::vpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6707   assert(UseAVX > 1, "requires AVX2");
6708   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6709   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6710   emit_int8(0x46);
6711   emit_int8((unsigned char)(0xC0 | encode));
6712 }
6713 
6714 // logical operations packed integers
6715 void Assembler::pand(XMMRegister dst, XMMRegister src) {
6716   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6717   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6718   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6719   emit_int8((unsigned char)0xDB);
6720   emit_int8((unsigned char)(0xC0 | encode));
6721 }
6722 
6723 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6724   assert(UseAVX > 0, "requires some form of AVX");
6725   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6726   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6727   emit_int8((unsigned char)0xDB);
6728   emit_int8((unsigned char)(0xC0 | encode));
6729 }
6730 
6731 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6732   assert(UseAVX > 0, "requires some form of AVX");
6733   InstructionMark im(this);
6734   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6735   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6736   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6737   emit_int8((unsigned char)0xDB);
6738   emit_operand(dst, src);
6739 }
6740 
6741 void Assembler::evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
6742   assert(VM_Version::supports_evex(), "");
6743   // Encoding: EVEX.NDS.XXX.66.0F.W0 DB /r
6744   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6745   attributes.set_is_evex_instruction();
6746   attributes.set_embedded_opmask_register_specifier(mask);
6747   if (merge) {
6748     attributes.reset_is_clear_context();
6749   }
6750   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6751   emit_int8((unsigned char)0xDB);
6752   emit_int8((unsigned char)(0xC0 | encode));
6753 }
6754 
6755 void Assembler::vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6756   assert(UseAVX > 2, "requires some form of EVEX");
6757   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6758   attributes.set_rex_vex_w_reverted();
6759   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6760   emit_int8((unsigned char)0xDB);
6761   emit_int8((unsigned char)(0xC0 | encode));
6762 }
6763 
6764 void Assembler::pandn(XMMRegister dst, XMMRegister src) {
6765   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6766   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6767   attributes.set_rex_vex_w_reverted();
6768   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6769   emit_int8((unsigned char)0xDF);
6770   emit_int8((unsigned char)(0xC0 | encode));
6771 }
6772 
6773 void Assembler::por(XMMRegister dst, XMMRegister src) {
6774   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6775   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6776   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6777   emit_int8((unsigned char)0xEB);
6778   emit_int8((unsigned char)(0xC0 | encode));
6779 }
6780 
6781 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6782   assert(UseAVX > 0, "requires some form of AVX");
6783   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6784   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6785   emit_int8((unsigned char)0xEB);
6786   emit_int8((unsigned char)(0xC0 | encode));
6787 }
6788 
6789 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6790   assert(UseAVX > 0, "requires some form of AVX");
6791   InstructionMark im(this);
6792   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6793   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6794   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6795   emit_int8((unsigned char)0xEB);
6796   emit_operand(dst, src);
6797 }
6798 
6799 void Assembler::vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6800   assert(UseAVX > 2, "requires some form of EVEX");
6801   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6802   attributes.set_rex_vex_w_reverted();
6803   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6804   emit_int8((unsigned char)0xEB);
6805   emit_int8((unsigned char)(0xC0 | encode));
6806 }
6807 
6808 void Assembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
6809   assert(VM_Version::supports_evex(), "");
6810   // Encoding: EVEX.NDS.XXX.66.0F.W0 EB /r
6811   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6812   attributes.set_is_evex_instruction();
6813   attributes.set_embedded_opmask_register_specifier(mask);
6814   if (merge) {
6815     attributes.reset_is_clear_context();
6816   }
6817   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6818   emit_int8((unsigned char)0xEB);
6819   emit_int8((unsigned char)(0xC0 | encode));
6820 }
6821 
6822 void Assembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
6823   assert(VM_Version::supports_evex(), "");
6824   // Encoding: EVEX.NDS.XXX.66.0F.W0 EB /r
6825   InstructionMark im(this);
6826   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6827   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
6828   attributes.set_is_evex_instruction();
6829   attributes.set_embedded_opmask_register_specifier(mask);
6830   if (merge) {
6831     attributes.reset_is_clear_context();
6832   }
6833   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6834   emit_int8((unsigned char)0xEB);
6835   emit_operand(dst, src);
6836 }
6837 
6838 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
6839   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6840   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6841   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6842   emit_int8((unsigned char)0xEF);
6843   emit_int8((unsigned char)(0xC0 | encode));
6844 }
6845 
6846 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6847   assert(UseAVX > 0, "requires some form of AVX");
6848   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6849   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6850   emit_int8((unsigned char)0xEF);
6851   emit_int8((unsigned char)(0xC0 | encode));
6852 }
6853 
6854 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6855   assert(UseAVX > 0, "requires some form of AVX");
6856   InstructionMark im(this);
6857   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6858   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6859   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6860   emit_int8((unsigned char)0xEF);
6861   emit_operand(dst, src);
6862 }
6863 
6864 void Assembler::vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6865   assert(UseAVX > 2, "requires some form of EVEX");
6866   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6867   attributes.set_rex_vex_w_reverted();
6868   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6869   emit_int8((unsigned char)0xEF);
6870   emit_int8((unsigned char)(0xC0 | encode));
6871 }
6872 
6873 void Assembler::evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
6874   assert(VM_Version::supports_evex(), "");
6875   // Encoding: EVEX.NDS.XXX.66.0F.W0 EF /r
6876   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6877   attributes.set_is_evex_instruction();
6878   attributes.set_embedded_opmask_register_specifier(mask);
6879   if (merge) {
6880     attributes.reset_is_clear_context();
6881   }
6882   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6883   emit_int8((unsigned char)0xEF);
6884   emit_int8((unsigned char)(0xC0 | encode));
6885 }
6886 
6887 // vinserti forms
6888 
6889 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6890   assert(VM_Version::supports_avx2(), "");
6891   assert(imm8 <= 0x01, "imm8: %u", imm8);
6892   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6893   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6894   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6895   emit_int8(0x38);
6896   emit_int8((unsigned char)(0xC0 | encode));
6897   // 0x00 - insert into lower 128 bits
6898   // 0x01 - insert into upper 128 bits
6899   emit_int8(imm8 & 0x01);
6900 }
6901 
6902 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6903   assert(VM_Version::supports_avx2(), "");
6904   assert(dst != xnoreg, "sanity");
6905   assert(imm8 <= 0x01, "imm8: %u", imm8);
6906   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6907   InstructionMark im(this);
6908   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6909   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6910   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6911   emit_int8(0x38);
6912   emit_operand(dst, src);
6913   // 0x00 - insert into lower 128 bits
6914   // 0x01 - insert into upper 128 bits
6915   emit_int8(imm8 & 0x01);
6916 }
6917 
6918 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6919   assert(VM_Version::supports_evex(), "");
6920   assert(imm8 <= 0x03, "imm8: %u", imm8);
6921   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6922   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6923   emit_int8(0x38);
6924   emit_int8((unsigned char)(0xC0 | encode));
6925   // 0x00 - insert into q0 128 bits (0..127)
6926   // 0x01 - insert into q1 128 bits (128..255)
6927   // 0x02 - insert into q2 128 bits (256..383)
6928   // 0x03 - insert into q3 128 bits (384..511)
6929   emit_int8(imm8 & 0x03);
6930 }
6931 
6932 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6933   assert(VM_Version::supports_avx(), "");
6934   assert(dst != xnoreg, "sanity");
6935   assert(imm8 <= 0x03, "imm8: %u", imm8);
6936   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6937   InstructionMark im(this);
6938   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6939   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6940   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6941   emit_int8(0x18);
6942   emit_operand(dst, src);
6943   // 0x00 - insert into q0 128 bits (0..127)
6944   // 0x01 - insert into q1 128 bits (128..255)
6945   // 0x02 - insert into q2 128 bits (256..383)
6946   // 0x03 - insert into q3 128 bits (384..511)
6947   emit_int8(imm8 & 0x03);
6948 }
6949 
6950 void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6951   assert(VM_Version::supports_evex(), "");
6952   assert(imm8 <= 0x01, "imm8: %u", imm8);
6953   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6954   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6955   emit_int8(0x38);
6956   emit_int8((unsigned char)(0xC0 | encode));
6957   // 0x00 - insert into lower 256 bits
6958   // 0x01 - insert into upper 256 bits
6959   emit_int8(imm8 & 0x01);
6960 }
6961 
6962 
6963 // vinsertf forms
6964 
6965 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6966   assert(VM_Version::supports_avx(), "");
6967   assert(imm8 <= 0x01, "imm8: %u", imm8);
6968   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6969   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6970   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6971   emit_int8(0x18);
6972   emit_int8((unsigned char)(0xC0 | encode));
6973   // 0x00 - insert into lower 128 bits
6974   // 0x01 - insert into upper 128 bits
6975   emit_int8(imm8 & 0x01);
6976 }
6977 
6978 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6979   assert(VM_Version::supports_avx(), "");
6980   assert(dst != xnoreg, "sanity");
6981   assert(imm8 <= 0x01, "imm8: %u", imm8);
6982   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6983   InstructionMark im(this);
6984   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6985   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6986   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6987   emit_int8(0x18);
6988   emit_operand(dst, src);
6989   // 0x00 - insert into lower 128 bits
6990   // 0x01 - insert into upper 128 bits
6991   emit_int8(imm8 & 0x01);
6992 }
6993 
6994 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6995   assert(VM_Version::supports_evex(), "");
6996   assert(imm8 <= 0x03, "imm8: %u", imm8);
6997   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6998   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6999   emit_int8(0x18);
7000   emit_int8((unsigned char)(0xC0 | encode));
7001   // 0x00 - insert into q0 128 bits (0..127)
7002   // 0x01 - insert into q1 128 bits (128..255)
7003   // 0x02 - insert into q2 128 bits (256..383)
7004   // 0x03 - insert into q3 128 bits (384..511)
7005   emit_int8(imm8 & 0x03);
7006 }
7007 
7008 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
7009   assert(VM_Version::supports_avx(), "");
7010   assert(dst != xnoreg, "sanity");
7011   assert(imm8 <= 0x03, "imm8: %u", imm8);
7012   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
7013   InstructionMark im(this);
7014   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7015   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7016   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7017   emit_int8(0x18);
7018   emit_operand(dst, src);
7019   // 0x00 - insert into q0 128 bits (0..127)
7020   // 0x01 - insert into q1 128 bits (128..255)
7021   // 0x02 - insert into q2 128 bits (256..383)
7022   // 0x03 - insert into q3 128 bits (384..511)
7023   emit_int8(imm8 & 0x03);
7024 }
7025 
7026 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
7027   assert(VM_Version::supports_evex(), "");
7028   assert(imm8 <= 0x01, "imm8: %u", imm8);
7029   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7030   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7031   emit_int8(0x1A);
7032   emit_int8((unsigned char)(0xC0 | encode));
7033   // 0x00 - insert into lower 256 bits
7034   // 0x01 - insert into upper 256 bits
7035   emit_int8(imm8 & 0x01);
7036 }
7037 
7038 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
7039   assert(VM_Version::supports_evex(), "");
7040   assert(dst != xnoreg, "sanity");
7041   assert(imm8 <= 0x01, "imm8: %u", imm8);
7042   InstructionMark im(this);
7043   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7044   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
7045   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7046   emit_int8(0x1A);
7047   emit_operand(dst, src);
7048   // 0x00 - insert into lower 256 bits
7049   // 0x01 - insert into upper 256 bits
7050   emit_int8(imm8 & 0x01);
7051 }
7052 
7053 
7054 // vextracti forms
7055 
7056 void Assembler::vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7057   assert(VM_Version::supports_avx(), "");
7058   assert(imm8 <= 0x01, "imm8: %u", imm8);
7059   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
7060   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7061   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7062   emit_int8(0x39);
7063   emit_int8((unsigned char)(0xC0 | encode));
7064   // 0x00 - extract from lower 128 bits
7065   // 0x01 - extract from upper 128 bits
7066   emit_int8(imm8 & 0x01);
7067 }
7068 
7069 void Assembler::vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
7070   assert(VM_Version::supports_avx2(), "");
7071   assert(src != xnoreg, "sanity");
7072   assert(imm8 <= 0x01, "imm8: %u", imm8);
7073   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
7074   InstructionMark im(this);
7075   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7076   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7077   attributes.reset_is_clear_context();
7078   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7079   emit_int8(0x39);
7080   emit_operand(src, dst);
7081   // 0x00 - extract from lower 128 bits
7082   // 0x01 - extract from upper 128 bits
7083   emit_int8(imm8 & 0x01);
7084 }
7085 
7086 void Assembler::vextracti32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7087   assert(VM_Version::supports_avx(), "");
7088   assert(imm8 <= 0x03, "imm8: %u", imm8);
7089   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
7090   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7091   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7092   emit_int8(0x39);
7093   emit_int8((unsigned char)(0xC0 | encode));
7094   // 0x00 - extract from bits 127:0
7095   // 0x01 - extract from bits 255:128
7096   // 0x02 - extract from bits 383:256
7097   // 0x03 - extract from bits 511:384
7098   emit_int8(imm8 & 0x03);
7099 }
7100 
7101 void Assembler::vextracti32x4(Address dst, XMMRegister src, uint8_t imm8) {
7102   assert(VM_Version::supports_evex(), "");
7103   assert(src != xnoreg, "sanity");
7104   assert(imm8 <= 0x03, "imm8: %u", imm8);
7105   InstructionMark im(this);
7106   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7107   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7108   attributes.reset_is_clear_context();
7109   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7110   emit_int8(0x39);
7111   emit_operand(src, dst);
7112   // 0x00 - extract from bits 127:0
7113   // 0x01 - extract from bits 255:128
7114   // 0x02 - extract from bits 383:256
7115   // 0x03 - extract from bits 511:384
7116   emit_int8(imm8 & 0x03);
7117 }
7118 
7119 void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7120   assert(VM_Version::supports_avx512dq(), "");
7121   assert(imm8 <= 0x03, "imm8: %u", imm8);
7122   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7123   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7124   emit_int8(0x39);
7125   emit_int8((unsigned char)(0xC0 | encode));
7126   // 0x00 - extract from bits 127:0
7127   // 0x01 - extract from bits 255:128
7128   // 0x02 - extract from bits 383:256
7129   // 0x03 - extract from bits 511:384
7130   emit_int8(imm8 & 0x03);
7131 }
7132 
7133 void Assembler::vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7134   assert(VM_Version::supports_evex(), "");
7135   assert(imm8 <= 0x01, "imm8: %u", imm8);
7136   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7137   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7138   emit_int8(0x3B);
7139   emit_int8((unsigned char)(0xC0 | encode));
7140   // 0x00 - extract from lower 256 bits
7141   // 0x01 - extract from upper 256 bits
7142   emit_int8(imm8 & 0x01);
7143 }
7144 
7145 
7146 // vextractf forms
7147 
7148 void Assembler::vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7149   assert(VM_Version::supports_avx(), "");
7150   assert(imm8 <= 0x01, "imm8: %u", imm8);
7151   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
7152   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7153   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7154   emit_int8(0x19);
7155   emit_int8((unsigned char)(0xC0 | encode));
7156   // 0x00 - extract from lower 128 bits
7157   // 0x01 - extract from upper 128 bits
7158   emit_int8(imm8 & 0x01);
7159 }
7160 
7161 void Assembler::vextractf128(Address dst, XMMRegister src, uint8_t imm8) {
7162   assert(VM_Version::supports_avx(), "");
7163   assert(src != xnoreg, "sanity");
7164   assert(imm8 <= 0x01, "imm8: %u", imm8);
7165   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
7166   InstructionMark im(this);
7167   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7168   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7169   attributes.reset_is_clear_context();
7170   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7171   emit_int8(0x19);
7172   emit_operand(src, dst);
7173   // 0x00 - extract from lower 128 bits
7174   // 0x01 - extract from upper 128 bits
7175   emit_int8(imm8 & 0x01);
7176 }
7177 
7178 void Assembler::vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7179   assert(VM_Version::supports_avx(), "");
7180   assert(imm8 <= 0x03, "imm8: %u", imm8);
7181   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
7182   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7183   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7184   emit_int8(0x19);
7185   emit_int8((unsigned char)(0xC0 | encode));
7186   // 0x00 - extract from bits 127:0
7187   // 0x01 - extract from bits 255:128
7188   // 0x02 - extract from bits 383:256
7189   // 0x03 - extract from bits 511:384
7190   emit_int8(imm8 & 0x03);
7191 }
7192 
7193 void Assembler::vextractf32x4(Address dst, XMMRegister src, uint8_t imm8) {
7194   assert(VM_Version::supports_evex(), "");
7195   assert(src != xnoreg, "sanity");
7196   assert(imm8 <= 0x03, "imm8: %u", imm8);
7197   InstructionMark im(this);
7198   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7199   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7200   attributes.reset_is_clear_context();
7201   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7202   emit_int8(0x19);
7203   emit_operand(src, dst);
7204   // 0x00 - extract from bits 127:0
7205   // 0x01 - extract from bits 255:128
7206   // 0x02 - extract from bits 383:256
7207   // 0x03 - extract from bits 511:384
7208   emit_int8(imm8 & 0x03);
7209 }
7210 
7211 void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7212   assert(VM_Version::supports_avx512dq(), "");
7213   assert(imm8 <= 0x03, "imm8: %u", imm8);
7214   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7215   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7216   emit_int8(0x19);
7217   emit_int8((unsigned char)(0xC0 | encode));
7218   // 0x00 - extract from bits 127:0
7219   // 0x01 - extract from bits 255:128
7220   // 0x02 - extract from bits 383:256
7221   // 0x03 - extract from bits 511:384
7222   emit_int8(imm8 & 0x03);
7223 }
7224 
7225 void Assembler::vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7226   assert(VM_Version::supports_evex(), "");
7227   assert(imm8 <= 0x01, "imm8: %u", imm8);
7228   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7229   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7230   emit_int8(0x1B);
7231   emit_int8((unsigned char)(0xC0 | encode));
7232   // 0x00 - extract from lower 256 bits
7233   // 0x01 - extract from upper 256 bits
7234   emit_int8(imm8 & 0x01);
7235 }
7236 
7237 void Assembler::vextractf64x4(Address dst, XMMRegister src, uint8_t imm8) {
7238   assert(VM_Version::supports_evex(), "");
7239   assert(src != xnoreg, "sanity");
7240   assert(imm8 <= 0x01, "imm8: %u", imm8);
7241   InstructionMark im(this);
7242   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7243   attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */  EVEX_64bit);
7244   attributes.reset_is_clear_context();
7245   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7246   emit_int8(0x1B);
7247   emit_operand(src, dst);
7248   // 0x00 - extract from lower 256 bits
7249   // 0x01 - extract from upper 256 bits
7250   emit_int8(imm8 & 0x01);
7251 }
7252 
7253 
7254 // legacy word/dword replicate
7255 void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
7256   assert(VM_Version::supports_avx2(), "");
7257   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7258   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7259   emit_int8(0x79);
7260   emit_int8((unsigned char)(0xC0 | encode));
7261 }
7262 
7263 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
7264   assert(VM_Version::supports_avx2(), "");
7265   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7266   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7267   emit_int8(0x58);
7268   emit_int8((unsigned char)(0xC0 | encode));
7269 }
7270 
7271 
7272 // xmm/mem sourced byte/word/dword/qword replicate
7273 
7274 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
7275 void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
7276   assert(VM_Version::supports_evex(), "");
7277   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7278   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7279   emit_int8(0x78);
7280   emit_int8((unsigned char)(0xC0 | encode));
7281 }
7282 
7283 void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
7284   assert(VM_Version::supports_evex(), "");
7285   assert(dst != xnoreg, "sanity");
7286   InstructionMark im(this);
7287   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7288   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
7289   // swap src<->dst for encoding
7290   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7291   emit_int8(0x78);
7292   emit_operand(dst, src);
7293 }
7294 
7295 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
7296 void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
7297   assert(VM_Version::supports_evex(), "");
7298   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7299   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7300   emit_int8(0x79);
7301   emit_int8((unsigned char)(0xC0 | encode));
7302 }
7303 
7304 void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) {
7305   assert(VM_Version::supports_evex(), "");
7306   assert(dst != xnoreg, "sanity");
7307   InstructionMark im(this);
7308   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7309   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
7310   // swap src<->dst for encoding
7311   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7312   emit_int8(0x79);
7313   emit_operand(dst, src);
7314 }
7315 
7316 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
7317 void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
7318   assert(VM_Version::supports_evex(), "");
7319   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7320   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7321   emit_int8(0x58);
7322   emit_int8((unsigned char)(0xC0 | encode));
7323 }
7324 
7325 void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
7326   assert(VM_Version::supports_evex(), "");
7327   assert(dst != xnoreg, "sanity");
7328   InstructionMark im(this);
7329   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7330   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7331   // swap src<->dst for encoding
7332   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7333   emit_int8(0x58);
7334   emit_operand(dst, src);
7335 }
7336 
7337 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
7338 void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
7339   assert(VM_Version::supports_evex(), "");
7340   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7341   attributes.set_rex_vex_w_reverted();
7342   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7343   emit_int8(0x59);
7344   emit_int8((unsigned char)(0xC0 | encode));
7345 }
7346 
7347 void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
7348   assert(VM_Version::supports_evex(), "");
7349   assert(dst != xnoreg, "sanity");
7350   InstructionMark im(this);
7351   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7352   attributes.set_rex_vex_w_reverted();
7353   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
7354   // swap src<->dst for encoding
7355   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7356   emit_int8(0x59);
7357   emit_operand(dst, src);
7358 }
7359 
7360 
7361 // scalar single/double precision replicate
7362 
7363 // duplicate single precision data from src into programmed locations in dest : requires AVX512VL
7364 void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
7365   assert(VM_Version::supports_evex(), "");
7366   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7367   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7368   emit_int8(0x18);
7369   emit_int8((unsigned char)(0xC0 | encode));
7370 }
7371 
7372 void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
7373   assert(VM_Version::supports_evex(), "");
7374   assert(dst != xnoreg, "sanity");
7375   InstructionMark im(this);
7376   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7377   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7378   // swap src<->dst for encoding
7379   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7380   emit_int8(0x18);
7381   emit_operand(dst, src);
7382 }
7383 
7384 // duplicate double precision data from src into programmed locations in dest : requires AVX512VL
7385 void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
7386   assert(VM_Version::supports_evex(), "");
7387   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7388   attributes.set_rex_vex_w_reverted();
7389   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7390   emit_int8(0x19);
7391   emit_int8((unsigned char)(0xC0 | encode));
7392 }
7393 
7394 void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
7395   assert(VM_Version::supports_evex(), "");
7396   assert(dst != xnoreg, "sanity");
7397   InstructionMark im(this);
7398   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7399   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
7400   attributes.set_rex_vex_w_reverted();
7401   // swap src<->dst for encoding
7402   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7403   emit_int8(0x19);
7404   emit_operand(dst, src);
7405 }
7406 
7407 
7408 // gpr source broadcast forms
7409 
7410 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
7411 void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
7412   assert(VM_Version::supports_evex(), "");
7413   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7414   attributes.set_is_evex_instruction();
7415   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7416   emit_int8(0x7A);
7417   emit_int8((unsigned char)(0xC0 | encode));
7418 }
7419 
7420 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
7421 void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
7422   assert(VM_Version::supports_evex(), "");
7423   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7424   attributes.set_is_evex_instruction();
7425   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7426   emit_int8(0x7B);
7427   emit_int8((unsigned char)(0xC0 | encode));
7428 }
7429 
7430 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
7431 void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
7432   assert(VM_Version::supports_evex(), "");
7433   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7434   attributes.set_is_evex_instruction();
7435   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7436   emit_int8(0x7C);
7437   emit_int8((unsigned char)(0xC0 | encode));
7438 }
7439 
7440 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
7441 void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
7442   assert(VM_Version::supports_evex(), "");
7443   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7444   attributes.set_is_evex_instruction();
7445   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7446   emit_int8(0x7C);
7447   emit_int8((unsigned char)(0xC0 | encode));
7448 }
7449 
7450 
7451 // Carry-Less Multiplication Quadword
7452 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
7453   assert(VM_Version::supports_clmul(), "");
7454   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7455   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7456   emit_int8(0x44);
7457   emit_int8((unsigned char)(0xC0 | encode));
7458   emit_int8((unsigned char)mask);
7459 }
7460 
7461 // Carry-Less Multiplication Quadword
7462 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
7463   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
7464   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7465   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7466   emit_int8(0x44);
7467   emit_int8((unsigned char)(0xC0 | encode));
7468   emit_int8((unsigned char)mask);
7469 }
7470 
7471 void Assembler::vzeroupper() {
7472   if (VM_Version::supports_vzeroupper()) {
7473     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
7474     (void)vex_prefix_and_encode(0, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
7475     emit_int8(0x77);
7476   }
7477 }
7478 
7479 #ifndef _LP64
7480 // 32bit only pieces of the assembler
7481 
7482 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
7483   // NO PREFIX AS NEVER 64BIT
7484   InstructionMark im(this);
7485   emit_int8((unsigned char)0x81);
7486   emit_int8((unsigned char)(0xF8 | src1->encoding()));
7487   emit_data(imm32, rspec, 0);
7488 }
7489 
7490 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
7491   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
7492   InstructionMark im(this);
7493   emit_int8((unsigned char)0x81);
7494   emit_operand(rdi, src1);
7495   emit_data(imm32, rspec, 0);
7496 }
7497 
7498 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
7499 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
7500 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
7501 void Assembler::cmpxchg8(Address adr) {
7502   InstructionMark im(this);
7503   emit_int8(0x0F);
7504   emit_int8((unsigned char)0xC7);
7505   emit_operand(rcx, adr);
7506 }
7507 
7508 void Assembler::decl(Register dst) {
7509   // Don't use it directly. Use MacroAssembler::decrementl() instead.
7510  emit_int8(0x48 | dst->encoding());
7511 }
7512 
7513 #endif // _LP64
7514 
7515 // 64bit typically doesn't use the x87 but needs to for the trig funcs
7516 
7517 void Assembler::fabs() {
7518   emit_int8((unsigned char)0xD9);
7519   emit_int8((unsigned char)0xE1);
7520 }
7521 
7522 void Assembler::fadd(int i) {
7523   emit_farith(0xD8, 0xC0, i);
7524 }
7525 
7526 void Assembler::fadd_d(Address src) {
7527   InstructionMark im(this);
7528   emit_int8((unsigned char)0xDC);
7529   emit_operand32(rax, src);
7530 }
7531 
7532 void Assembler::fadd_s(Address src) {
7533   InstructionMark im(this);
7534   emit_int8((unsigned char)0xD8);
7535   emit_operand32(rax, src);
7536 }
7537 
7538 void Assembler::fadda(int i) {
7539   emit_farith(0xDC, 0xC0, i);
7540 }
7541 
7542 void Assembler::faddp(int i) {
7543   emit_farith(0xDE, 0xC0, i);
7544 }
7545 
7546 void Assembler::fchs() {
7547   emit_int8((unsigned char)0xD9);
7548   emit_int8((unsigned char)0xE0);
7549 }
7550 
7551 void Assembler::fcom(int i) {
7552   emit_farith(0xD8, 0xD0, i);
7553 }
7554 
7555 void Assembler::fcomp(int i) {
7556   emit_farith(0xD8, 0xD8, i);
7557 }
7558 
7559 void Assembler::fcomp_d(Address src) {
7560   InstructionMark im(this);
7561   emit_int8((unsigned char)0xDC);
7562   emit_operand32(rbx, src);
7563 }
7564 
7565 void Assembler::fcomp_s(Address src) {
7566   InstructionMark im(this);
7567   emit_int8((unsigned char)0xD8);
7568   emit_operand32(rbx, src);
7569 }
7570 
7571 void Assembler::fcompp() {
7572   emit_int8((unsigned char)0xDE);
7573   emit_int8((unsigned char)0xD9);
7574 }
7575 
7576 void Assembler::fcos() {
7577   emit_int8((unsigned char)0xD9);
7578   emit_int8((unsigned char)0xFF);
7579 }
7580 
7581 void Assembler::fdecstp() {
7582   emit_int8((unsigned char)0xD9);
7583   emit_int8((unsigned char)0xF6);
7584 }
7585 
7586 void Assembler::fdiv(int i) {
7587   emit_farith(0xD8, 0xF0, i);
7588 }
7589 
7590 void Assembler::fdiv_d(Address src) {
7591   InstructionMark im(this);
7592   emit_int8((unsigned char)0xDC);
7593   emit_operand32(rsi, src);
7594 }
7595 
7596 void Assembler::fdiv_s(Address src) {
7597   InstructionMark im(this);
7598   emit_int8((unsigned char)0xD8);
7599   emit_operand32(rsi, src);
7600 }
7601 
7602 void Assembler::fdiva(int i) {
7603   emit_farith(0xDC, 0xF8, i);
7604 }
7605 
7606 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
7607 //       is erroneous for some of the floating-point instructions below.
7608 
7609 void Assembler::fdivp(int i) {
7610   emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
7611 }
7612 
7613 void Assembler::fdivr(int i) {
7614   emit_farith(0xD8, 0xF8, i);
7615 }
7616 
7617 void Assembler::fdivr_d(Address src) {
7618   InstructionMark im(this);
7619   emit_int8((unsigned char)0xDC);
7620   emit_operand32(rdi, src);
7621 }
7622 
7623 void Assembler::fdivr_s(Address src) {
7624   InstructionMark im(this);
7625   emit_int8((unsigned char)0xD8);
7626   emit_operand32(rdi, src);
7627 }
7628 
7629 void Assembler::fdivra(int i) {
7630   emit_farith(0xDC, 0xF0, i);
7631 }
7632 
7633 void Assembler::fdivrp(int i) {
7634   emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
7635 }
7636 
7637 void Assembler::ffree(int i) {
7638   emit_farith(0xDD, 0xC0, i);
7639 }
7640 
7641 void Assembler::fild_d(Address adr) {
7642   InstructionMark im(this);
7643   emit_int8((unsigned char)0xDF);
7644   emit_operand32(rbp, adr);
7645 }
7646 
7647 void Assembler::fild_s(Address adr) {
7648   InstructionMark im(this);
7649   emit_int8((unsigned char)0xDB);
7650   emit_operand32(rax, adr);
7651 }
7652 
7653 void Assembler::fincstp() {
7654   emit_int8((unsigned char)0xD9);
7655   emit_int8((unsigned char)0xF7);
7656 }
7657 
7658 void Assembler::finit() {
7659   emit_int8((unsigned char)0x9B);
7660   emit_int8((unsigned char)0xDB);
7661   emit_int8((unsigned char)0xE3);
7662 }
7663 
7664 void Assembler::fist_s(Address adr) {
7665   InstructionMark im(this);
7666   emit_int8((unsigned char)0xDB);
7667   emit_operand32(rdx, adr);
7668 }
7669 
7670 void Assembler::fistp_d(Address adr) {
7671   InstructionMark im(this);
7672   emit_int8((unsigned char)0xDF);
7673   emit_operand32(rdi, adr);
7674 }
7675 
7676 void Assembler::fistp_s(Address adr) {
7677   InstructionMark im(this);
7678   emit_int8((unsigned char)0xDB);
7679   emit_operand32(rbx, adr);
7680 }
7681 
7682 void Assembler::fld1() {
7683   emit_int8((unsigned char)0xD9);
7684   emit_int8((unsigned char)0xE8);
7685 }
7686 
7687 void Assembler::fld_d(Address adr) {
7688   InstructionMark im(this);
7689   emit_int8((unsigned char)0xDD);
7690   emit_operand32(rax, adr);
7691 }
7692 
7693 void Assembler::fld_s(Address adr) {
7694   InstructionMark im(this);
7695   emit_int8((unsigned char)0xD9);
7696   emit_operand32(rax, adr);
7697 }
7698 
7699 
7700 void Assembler::fld_s(int index) {
7701   emit_farith(0xD9, 0xC0, index);
7702 }
7703 
7704 void Assembler::fld_x(Address adr) {
7705   InstructionMark im(this);
7706   emit_int8((unsigned char)0xDB);
7707   emit_operand32(rbp, adr);
7708 }
7709 
7710 void Assembler::fldcw(Address src) {
7711   InstructionMark im(this);
7712   emit_int8((unsigned char)0xD9);
7713   emit_operand32(rbp, src);
7714 }
7715 
7716 void Assembler::fldenv(Address src) {
7717   InstructionMark im(this);
7718   emit_int8((unsigned char)0xD9);
7719   emit_operand32(rsp, src);
7720 }
7721 
7722 void Assembler::fldlg2() {
7723   emit_int8((unsigned char)0xD9);
7724   emit_int8((unsigned char)0xEC);
7725 }
7726 
7727 void Assembler::fldln2() {
7728   emit_int8((unsigned char)0xD9);
7729   emit_int8((unsigned char)0xED);
7730 }
7731 
7732 void Assembler::fldz() {
7733   emit_int8((unsigned char)0xD9);
7734   emit_int8((unsigned char)0xEE);
7735 }
7736 
7737 void Assembler::flog() {
7738   fldln2();
7739   fxch();
7740   fyl2x();
7741 }
7742 
7743 void Assembler::flog10() {
7744   fldlg2();
7745   fxch();
7746   fyl2x();
7747 }
7748 
7749 void Assembler::fmul(int i) {
7750   emit_farith(0xD8, 0xC8, i);
7751 }
7752 
7753 void Assembler::fmul_d(Address src) {
7754   InstructionMark im(this);
7755   emit_int8((unsigned char)0xDC);
7756   emit_operand32(rcx, src);
7757 }
7758 
7759 void Assembler::fmul_s(Address src) {
7760   InstructionMark im(this);
7761   emit_int8((unsigned char)0xD8);
7762   emit_operand32(rcx, src);
7763 }
7764 
7765 void Assembler::fmula(int i) {
7766   emit_farith(0xDC, 0xC8, i);
7767 }
7768 
7769 void Assembler::fmulp(int i) {
7770   emit_farith(0xDE, 0xC8, i);
7771 }
7772 
7773 void Assembler::fnsave(Address dst) {
7774   InstructionMark im(this);
7775   emit_int8((unsigned char)0xDD);
7776   emit_operand32(rsi, dst);
7777 }
7778 
7779 void Assembler::fnstcw(Address src) {
7780   InstructionMark im(this);
7781   emit_int8((unsigned char)0x9B);
7782   emit_int8((unsigned char)0xD9);
7783   emit_operand32(rdi, src);
7784 }
7785 
7786 void Assembler::fnstsw_ax() {
7787   emit_int8((unsigned char)0xDF);
7788   emit_int8((unsigned char)0xE0);
7789 }
7790 
7791 void Assembler::fprem() {
7792   emit_int8((unsigned char)0xD9);
7793   emit_int8((unsigned char)0xF8);
7794 }
7795 
7796 void Assembler::fprem1() {
7797   emit_int8((unsigned char)0xD9);
7798   emit_int8((unsigned char)0xF5);
7799 }
7800 
7801 void Assembler::frstor(Address src) {
7802   InstructionMark im(this);
7803   emit_int8((unsigned char)0xDD);
7804   emit_operand32(rsp, src);
7805 }
7806 
7807 void Assembler::fsin() {
7808   emit_int8((unsigned char)0xD9);
7809   emit_int8((unsigned char)0xFE);
7810 }
7811 
7812 void Assembler::fsqrt() {
7813   emit_int8((unsigned char)0xD9);
7814   emit_int8((unsigned char)0xFA);
7815 }
7816 
7817 void Assembler::fst_d(Address adr) {
7818   InstructionMark im(this);
7819   emit_int8((unsigned char)0xDD);
7820   emit_operand32(rdx, adr);
7821 }
7822 
7823 void Assembler::fst_s(Address adr) {
7824   InstructionMark im(this);
7825   emit_int8((unsigned char)0xD9);
7826   emit_operand32(rdx, adr);
7827 }
7828 
7829 void Assembler::fstp_d(Address adr) {
7830   InstructionMark im(this);
7831   emit_int8((unsigned char)0xDD);
7832   emit_operand32(rbx, adr);
7833 }
7834 
7835 void Assembler::fstp_d(int index) {
7836   emit_farith(0xDD, 0xD8, index);
7837 }
7838 
7839 void Assembler::fstp_s(Address adr) {
7840   InstructionMark im(this);
7841   emit_int8((unsigned char)0xD9);
7842   emit_operand32(rbx, adr);
7843 }
7844 
7845 void Assembler::fstp_x(Address adr) {
7846   InstructionMark im(this);
7847   emit_int8((unsigned char)0xDB);
7848   emit_operand32(rdi, adr);
7849 }
7850 
7851 void Assembler::fsub(int i) {
7852   emit_farith(0xD8, 0xE0, i);
7853 }
7854 
7855 void Assembler::fsub_d(Address src) {
7856   InstructionMark im(this);
7857   emit_int8((unsigned char)0xDC);
7858   emit_operand32(rsp, src);
7859 }
7860 
7861 void Assembler::fsub_s(Address src) {
7862   InstructionMark im(this);
7863   emit_int8((unsigned char)0xD8);
7864   emit_operand32(rsp, src);
7865 }
7866 
7867 void Assembler::fsuba(int i) {
7868   emit_farith(0xDC, 0xE8, i);
7869 }
7870 
7871 void Assembler::fsubp(int i) {
7872   emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
7873 }
7874 
7875 void Assembler::fsubr(int i) {
7876   emit_farith(0xD8, 0xE8, i);
7877 }
7878 
7879 void Assembler::fsubr_d(Address src) {
7880   InstructionMark im(this);
7881   emit_int8((unsigned char)0xDC);
7882   emit_operand32(rbp, src);
7883 }
7884 
7885 void Assembler::fsubr_s(Address src) {
7886   InstructionMark im(this);
7887   emit_int8((unsigned char)0xD8);
7888   emit_operand32(rbp, src);
7889 }
7890 
7891 void Assembler::fsubra(int i) {
7892   emit_farith(0xDC, 0xE0, i);
7893 }
7894 
7895 void Assembler::fsubrp(int i) {
7896   emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
7897 }
7898 
7899 void Assembler::ftan() {
7900   emit_int8((unsigned char)0xD9);
7901   emit_int8((unsigned char)0xF2);
7902   emit_int8((unsigned char)0xDD);
7903   emit_int8((unsigned char)0xD8);
7904 }
7905 
7906 void Assembler::ftst() {
7907   emit_int8((unsigned char)0xD9);
7908   emit_int8((unsigned char)0xE4);
7909 }
7910 
7911 void Assembler::fucomi(int i) {
7912   // make sure the instruction is supported (introduced for P6, together with cmov)
7913   guarantee(VM_Version::supports_cmov(), "illegal instruction");
7914   emit_farith(0xDB, 0xE8, i);
7915 }
7916 
7917 void Assembler::fucomip(int i) {
7918   // make sure the instruction is supported (introduced for P6, together with cmov)
7919   guarantee(VM_Version::supports_cmov(), "illegal instruction");
7920   emit_farith(0xDF, 0xE8, i);
7921 }
7922 
7923 void Assembler::fwait() {
7924   emit_int8((unsigned char)0x9B);
7925 }
7926 
7927 void Assembler::fxch(int i) {
7928   emit_farith(0xD9, 0xC8, i);
7929 }
7930 
7931 void Assembler::fyl2x() {
7932   emit_int8((unsigned char)0xD9);
7933   emit_int8((unsigned char)0xF1);
7934 }
7935 
7936 void Assembler::frndint() {
7937   emit_int8((unsigned char)0xD9);
7938   emit_int8((unsigned char)0xFC);
7939 }
7940 
7941 void Assembler::f2xm1() {
7942   emit_int8((unsigned char)0xD9);
7943   emit_int8((unsigned char)0xF0);
7944 }
7945 
7946 void Assembler::fldl2e() {
7947   emit_int8((unsigned char)0xD9);
7948   emit_int8((unsigned char)0xEA);
7949 }
7950 
7951 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
7952 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
7953 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
7954 static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
7955 
7956 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
7957 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
7958   if (pre > 0) {
7959     emit_int8(simd_pre[pre]);
7960   }
7961   if (rex_w) {
7962     prefixq(adr, xreg);
7963   } else {
7964     prefix(adr, xreg);
7965   }
7966   if (opc > 0) {
7967     emit_int8(0x0F);
7968     int opc2 = simd_opc[opc];
7969     if (opc2 > 0) {
7970       emit_int8(opc2);
7971     }
7972   }
7973 }
7974 
7975 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
7976   if (pre > 0) {
7977     emit_int8(simd_pre[pre]);
7978   }
7979   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : prefix_and_encode(dst_enc, src_enc);
7980   if (opc > 0) {
7981     emit_int8(0x0F);
7982     int opc2 = simd_opc[opc];
7983     if (opc2 > 0) {
7984       emit_int8(opc2);
7985     }
7986   }
7987   return encode;
7988 }
7989 
7990 
7991 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc) {
7992   int vector_len = _attributes->get_vector_len();
7993   bool vex_w = _attributes->is_rex_vex_w();
7994   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
7995     prefix(VEX_3bytes);
7996 
7997     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
7998     byte1 = (~byte1) & 0xE0;
7999     byte1 |= opc;
8000     emit_int8(byte1);
8001 
8002     int byte2 = ((~nds_enc) & 0xf) << 3;
8003     byte2 |= (vex_w ? VEX_W : 0) | ((vector_len > 0) ? 4 : 0) | pre;
8004     emit_int8(byte2);
8005   } else {
8006     prefix(VEX_2bytes);
8007 
8008     int byte1 = vex_r ? VEX_R : 0;
8009     byte1 = (~byte1) & 0x80;
8010     byte1 |= ((~nds_enc) & 0xf) << 3;
8011     byte1 |= ((vector_len > 0 ) ? 4 : 0) | pre;
8012     emit_int8(byte1);
8013   }
8014 }
8015 
8016 // This is a 4 byte encoding
8017 void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v, int nds_enc, VexSimdPrefix pre, VexOpcode opc){
8018   // EVEX 0x62 prefix
8019   prefix(EVEX_4bytes);
8020   bool vex_w = _attributes->is_rex_vex_w();
8021   int evex_encoding = (vex_w ? VEX_W : 0);
8022   // EVEX.b is not currently used for broadcast of single element or data rounding modes
8023   _attributes->set_evex_encoding(evex_encoding);
8024 
8025   // P0: byte 2, initialized to RXBR`00mm
8026   // instead of not'd
8027   int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0);
8028   byte2 = (~byte2) & 0xF0;
8029   // confine opc opcode extensions in mm bits to lower two bits
8030   // of form {0F, 0F_38, 0F_3A}
8031   byte2 |= opc;
8032   emit_int8(byte2);
8033 
8034   // P1: byte 3 as Wvvvv1pp
8035   int byte3 = ((~nds_enc) & 0xf) << 3;
8036   // p[10] is always 1
8037   byte3 |= EVEX_F;
8038   byte3 |= (vex_w & 1) << 7;
8039   // confine pre opcode extensions in pp bits to lower two bits
8040   // of form {66, F3, F2}
8041   byte3 |= pre;
8042   emit_int8(byte3);
8043 
8044   // P2: byte 4 as zL'Lbv'aaa
8045   // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
8046   int byte4 = (_attributes->is_no_reg_mask()) ?
8047               0 :
8048               _attributes->get_embedded_opmask_register_specifier();
8049   // EVEX.v` for extending EVEX.vvvv or VIDX
8050   byte4 |= (evex_v ? 0: EVEX_V);
8051   // third EXEC.b for broadcast actions
8052   byte4 |= (_attributes->is_extended_context() ? EVEX_Rb : 0);
8053   // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
8054   byte4 |= ((_attributes->get_vector_len())& 0x3) << 5;
8055   // last is EVEX.z for zero/merge actions
8056   if (_attributes->is_no_reg_mask() == false) {
8057     byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0);
8058   }
8059   emit_int8(byte4);
8060 }
8061 
8062 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
8063   bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0;
8064   bool vex_b = adr.base_needs_rex();
8065   bool vex_x = adr.index_needs_rex();
8066   set_attributes(attributes);
8067   attributes->set_current_assembler(this);
8068 
8069   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
8070   if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
8071     switch (attributes->get_vector_len()) {
8072     case AVX_128bit:
8073     case AVX_256bit:
8074       attributes->set_is_legacy_mode();
8075       break;
8076     }
8077   }
8078 
8079   // For pure EVEX check and see if this instruction
8080   // is allowed in legacy mode and has resources which will
8081   // fit in it.  Pure EVEX instructions will use set_is_evex_instruction in their definition,
8082   // else that field is set when we encode to EVEX
8083   if (UseAVX > 2 && !attributes->is_legacy_mode() &&
8084       !_is_managed && !attributes->is_evex_instruction()) {
8085     if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
8086       bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
8087       if (check_register_bank) {
8088         // check nds_enc and xreg_enc for upper bank usage
8089         if (nds_enc < 16 && xreg_enc < 16) {
8090           attributes->set_is_legacy_mode();
8091         }
8092       } else {
8093         attributes->set_is_legacy_mode();
8094       }
8095     }
8096   }
8097 
8098   _is_managed = false;
8099   if (UseAVX > 2 && !attributes->is_legacy_mode())
8100   {
8101     bool evex_r = (xreg_enc >= 16);
8102     bool evex_v = (nds_enc >= 16);
8103     attributes->set_is_evex_instruction();
8104     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
8105   } else {
8106     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
8107       attributes->set_rex_vex_w(false);
8108     }
8109     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
8110   }
8111 }
8112 
8113 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
8114   bool vex_r = ((dst_enc & 8) == 8) ? 1 : 0;
8115   bool vex_b = ((src_enc & 8) == 8) ? 1 : 0;
8116   bool vex_x = false;
8117   set_attributes(attributes);
8118   attributes->set_current_assembler(this);
8119   bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
8120 
8121   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
8122   if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
8123     switch (attributes->get_vector_len()) {
8124     case AVX_128bit:
8125     case AVX_256bit:
8126       if (check_register_bank) {
8127         if (dst_enc >= 16 || nds_enc >= 16 || src_enc >= 16) {
8128           // up propagate arithmetic instructions to meet RA requirements
8129           attributes->set_vector_len(AVX_512bit);
8130         } else {
8131           attributes->set_is_legacy_mode();
8132         }
8133       } else {
8134         attributes->set_is_legacy_mode();
8135       }
8136       break;
8137     }
8138   }
8139 
8140   // For pure EVEX check and see if this instruction
8141   // is allowed in legacy mode and has resources which will
8142   // fit in it.  Pure EVEX instructions will use set_is_evex_instruction in their definition,
8143   // else that field is set when we encode to EVEX
8144   if (UseAVX > 2 && !attributes->is_legacy_mode() &&
8145       !_is_managed && !attributes->is_evex_instruction()) {
8146     if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
8147       if (check_register_bank) {
8148         // check dst_enc, nds_enc and src_enc for upper bank usage
8149         if (dst_enc < 16 && nds_enc < 16 && src_enc < 16) {
8150           attributes->set_is_legacy_mode();
8151         }
8152       } else {
8153         attributes->set_is_legacy_mode();
8154       }
8155     }
8156   }
8157 
8158   _is_managed = false;
8159   if (UseAVX > 2 && !attributes->is_legacy_mode())
8160   {
8161     bool evex_r = (dst_enc >= 16);
8162     bool evex_v = (nds_enc >= 16);
8163     // can use vex_x as bank extender on rm encoding
8164     vex_x = (src_enc >= 16);
8165     attributes->set_is_evex_instruction();
8166     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
8167   } else {
8168     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
8169       attributes->set_rex_vex_w(false);
8170     }
8171     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
8172   }
8173 
8174   // return modrm byte components for operands
8175   return (((dst_enc & 7) << 3) | (src_enc & 7));
8176 }
8177 
8178 
8179 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
8180                             VexOpcode opc, InstructionAttr *attributes) {
8181   if (UseAVX > 0) {
8182     int xreg_enc = xreg->encoding();
8183     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
8184     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, attributes);
8185   } else {
8186     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
8187     rex_prefix(adr, xreg, pre, opc, attributes->is_rex_vex_w());
8188   }
8189 }
8190 
8191 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
8192                                       VexOpcode opc, InstructionAttr *attributes) {
8193   int dst_enc = dst->encoding();
8194   int src_enc = src->encoding();
8195   if (UseAVX > 0) {
8196     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
8197     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes);
8198   } else {
8199     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
8200     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, attributes->is_rex_vex_w());
8201   }
8202 }
8203 
8204 void Assembler::vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
8205   assert(VM_Version::supports_avx(), "");
8206   assert(!VM_Version::supports_evex(), "");
8207   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
8208   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8209   emit_int8((unsigned char)0xC2);
8210   emit_int8((unsigned char)(0xC0 | encode));
8211   emit_int8((unsigned char)(0x1F & cop));
8212 }
8213 
8214 void Assembler::vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
8215   assert(UseAVX > 0 && (vector_len == AVX_128bit || vector_len == AVX_256bit), "");
8216   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
8217   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8218   emit_int8((unsigned char)0x4B);
8219   emit_int8((unsigned char)(0xC0 | encode));
8220   int src2_enc = src2->encoding();
8221   emit_int8((unsigned char)(0xF0 & src2_enc<<4));
8222 }
8223 
8224 void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
8225   assert(VM_Version::supports_avx2(), "");
8226   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
8227   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8228   emit_int8((unsigned char)0x02);
8229   emit_int8((unsigned char)(0xC0 | encode));
8230   emit_int8((unsigned char)imm8);
8231 }
8232 
8233 void Assembler::vcmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int comparison, int vector_len) {
8234   assert(VM_Version::supports_avx(), "");
8235   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8236   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
8237   emit_int8((unsigned char)0xC2);
8238   emit_int8((unsigned char)(0xC0 | encode));
8239   emit_int8((unsigned char)comparison);
8240 }
8241 
8242 void Assembler::evcmpps(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8243                         ComparisonPredicateFP comparison, int vector_len) {
8244   assert(VM_Version::supports_evex(), "");
8245   // Encoding: EVEX.NDS.XXX.0F.W0 C2 /r ib
8246   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8247   attributes.set_is_evex_instruction();
8248   attributes.set_embedded_opmask_register_specifier(mask);
8249   attributes.reset_is_clear_context();
8250   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
8251   emit_int8((unsigned char)0xC2);
8252   emit_int8((unsigned char)(0xC0 | encode));
8253   emit_int8((unsigned char)comparison);
8254 }
8255 
8256 void Assembler::evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8257                         ComparisonPredicateFP comparison, int vector_len) {
8258   assert(VM_Version::supports_evex(), "");
8259   // Encoding: EVEX.NDS.XXX.66.0F.W1 C2 /r ib
8260   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8261   attributes.set_is_evex_instruction();
8262   attributes.set_embedded_opmask_register_specifier(mask);
8263   attributes.reset_is_clear_context();
8264   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8265   emit_int8((unsigned char)0xC2);
8266   emit_int8((unsigned char)(0xC0 | encode));
8267   emit_int8((unsigned char)comparison);
8268 }
8269 
8270 void Assembler::blendvps(XMMRegister dst, XMMRegister src) {
8271   assert(VM_Version::supports_sse4_1(), "");
8272   assert(UseAVX <= 0, "sse encoding is inconsistent with avx encoding");
8273   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
8274   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8275   emit_int8(0x14);
8276   emit_int8((unsigned char)(0xC0 | encode));
8277 }
8278 
8279 void Assembler::blendvpd(XMMRegister dst, XMMRegister src) {
8280   assert(VM_Version::supports_sse4_1(), "");
8281   assert(UseAVX <= 0, "sse encoding is inconsistent with avx encoding");
8282   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
8283   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8284   emit_int8(0x15);
8285   emit_int8((unsigned char)(0xC0 | encode));
8286 }
8287 
8288 void Assembler::pblendvb(XMMRegister dst, XMMRegister src) {
8289   assert(VM_Version::supports_sse4_1(), "");
8290   assert(UseAVX <= 0, "sse encoding is inconsistent with avx encoding");
8291   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
8292   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8293   emit_int8(0x10);
8294   emit_int8((unsigned char)(0xC0 | encode));
8295 }
8296 
8297 void Assembler::vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len) {
8298   assert(UseAVX > 0 && (vector_len == AVX_128bit || vector_len == AVX_256bit), "");
8299   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8300   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8301   emit_int8((unsigned char)0x4A);
8302   emit_int8((unsigned char)(0xC0 | encode));
8303   int mask_enc = mask->encoding();
8304   emit_int8((unsigned char)(0xF0 & mask_enc<<4));
8305 }
8306 
8307 void Assembler::vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
8308   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
8309   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
8310   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
8311   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8312   emit_int8((unsigned char)0x64);
8313   emit_int8((unsigned char)(0xC0 | encode));
8314 }
8315 
8316 void Assembler::vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
8317   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
8318   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
8319   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
8320   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8321   emit_int8((unsigned char)0x65);
8322   emit_int8((unsigned char)(0xC0 | encode));
8323 }
8324 
8325 void Assembler::vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
8326   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
8327   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
8328   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
8329   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8330   emit_int8((unsigned char)0x66);
8331   emit_int8((unsigned char)(0xC0 | encode));
8332 }
8333 
8334 void Assembler::vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
8335   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
8336   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
8337   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
8338   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8339   emit_int8((unsigned char)0x37);
8340   emit_int8((unsigned char)(0xC0 | encode));
8341 }
8342 
8343 void Assembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8344                         int comparison, int vector_len) {
8345   assert(VM_Version::supports_evex(), "");
8346   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8347   // Encoding: EVEX.NDS.XXX.66.0F3A.W0 1F /r ib
8348   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8349   attributes.set_is_evex_instruction();
8350   attributes.set_embedded_opmask_register_specifier(mask);
8351   attributes.reset_is_clear_context();
8352   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8353   emit_int8((unsigned char)0x1F);
8354   emit_int8((unsigned char)(0xC0 | encode));
8355   emit_int8((unsigned char)comparison);
8356 }
8357 
8358 void Assembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
8359                         int comparison, int vector_len) {
8360   assert(VM_Version::supports_evex(), "");
8361   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8362   // Encoding: EVEX.NDS.XXX.66.0F3A.W0 1F /r ib
8363   InstructionMark im(this);
8364   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8365   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
8366   attributes.set_is_evex_instruction();
8367   attributes.set_embedded_opmask_register_specifier(mask);
8368   attributes.reset_is_clear_context();
8369   int dst_enc = kdst->encoding();
8370   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8371   emit_int8((unsigned char)0x1F);
8372   emit_operand(as_Register(dst_enc), src);
8373   emit_int8((unsigned char)comparison);
8374 }
8375 
8376 void Assembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8377                         int comparison, int vector_len) {
8378   assert(VM_Version::supports_evex(), "");
8379   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8380   // Encoding: EVEX.NDS.XXX.66.0F3A.W1 1F /r ib
8381   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8382   attributes.set_is_evex_instruction();
8383   attributes.set_embedded_opmask_register_specifier(mask);
8384   attributes.reset_is_clear_context();
8385   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8386   emit_int8((unsigned char)0x1F);
8387   emit_int8((unsigned char)(0xC0 | encode));
8388   emit_int8((unsigned char)comparison);
8389 }
8390 
8391 void Assembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
8392                         int comparison, int vector_len) {
8393   assert(VM_Version::supports_evex(), "");
8394   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8395   // Encoding: EVEX.NDS.XXX.66.0F3A.W1 1F /r ib
8396   InstructionMark im(this);
8397   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8398   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
8399   attributes.set_is_evex_instruction();
8400   attributes.set_embedded_opmask_register_specifier(mask);
8401   attributes.reset_is_clear_context();
8402   int dst_enc = kdst->encoding();
8403   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8404   emit_int8((unsigned char)0x1F);
8405   emit_operand(as_Register(dst_enc), src);
8406   emit_int8((unsigned char)comparison);
8407 }
8408 
8409 void Assembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8410                         int comparison, int vector_len) {
8411   assert(VM_Version::supports_evex(), "");
8412   assert(VM_Version::supports_avx512bw(), "");
8413   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8414   // Encoding: EVEX.NDS.XXX.66.0F3A.W0 3F /r ib
8415   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8416   attributes.set_is_evex_instruction();
8417   attributes.set_embedded_opmask_register_specifier(mask);
8418   attributes.reset_is_clear_context();
8419   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8420   emit_int8((unsigned char)0x3F);
8421   emit_int8((unsigned char)(0xC0 | encode));
8422   emit_int8((unsigned char)comparison);
8423 }
8424 
8425 void Assembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
8426                         int comparison, int vector_len) {
8427   assert(VM_Version::supports_evex(), "");
8428   assert(VM_Version::supports_avx512bw(), "");
8429   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8430   // Encoding: EVEX.NDS.XXX.66.0F3A.W0 3F /r ib
8431   InstructionMark im(this);
8432   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8433   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
8434   attributes.set_is_evex_instruction();
8435   attributes.set_embedded_opmask_register_specifier(mask);
8436   attributes.reset_is_clear_context();
8437   int dst_enc = kdst->encoding();
8438   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8439   emit_int8((unsigned char)0x3F);
8440   emit_operand(as_Register(dst_enc), src);
8441   emit_int8((unsigned char)comparison);
8442 }
8443 
8444 void Assembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8445                         int comparison, int vector_len) {
8446   assert(VM_Version::supports_evex(), "");
8447   assert(VM_Version::supports_avx512bw(), "");
8448   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8449   // Encoding: EVEX.NDS.XXX.66.0F3A.W1 3F /r ib
8450   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8451   attributes.set_is_evex_instruction();
8452   attributes.set_embedded_opmask_register_specifier(mask);
8453   attributes.reset_is_clear_context();
8454   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8455   emit_int8((unsigned char)0x3F);
8456   emit_int8((unsigned char)(0xC0 | encode));
8457   emit_int8((unsigned char)comparison);
8458 }
8459 
8460 void Assembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
8461                         int comparison, int vector_len) {
8462   assert(VM_Version::supports_evex(), "");
8463   assert(VM_Version::supports_avx512bw(), "");
8464   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8465   // Encoding: EVEX.NDS.XXX.66.0F3A.W1 3F /r ib
8466   InstructionMark im(this);
8467   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8468   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
8469   attributes.set_is_evex_instruction();
8470   attributes.set_embedded_opmask_register_specifier(mask);
8471   attributes.reset_is_clear_context();
8472   int dst_enc = kdst->encoding();
8473   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8474   emit_int8((unsigned char)0x3F);
8475   emit_operand(as_Register(dst_enc), src);
8476   emit_int8((unsigned char)comparison);
8477 }
8478 
8479 void Assembler::vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len) {
8480   assert(VM_Version::supports_avx(), "");
8481   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8482   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8483   emit_int8((unsigned char)0x4C);
8484   emit_int8((unsigned char)(0xC0 | encode));
8485   int mask_enc = mask->encoding();
8486   emit_int8((unsigned char)(0xF0 & mask_enc << 4));
8487 }
8488 
8489 void Assembler::evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8490   assert(VM_Version::supports_evex(), "");
8491   // Encoding: EVEX.NDS.XXX.66.0F38.W1 65 /r
8492   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8493   attributes.set_is_evex_instruction();
8494   attributes.set_embedded_opmask_register_specifier(mask);
8495   if (merge) {
8496     attributes.reset_is_clear_context();
8497   }
8498   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8499   emit_int8((unsigned char)0x65);
8500   emit_int8((unsigned char)(0xC0 | encode));
8501 }
8502 
8503 void Assembler::evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8504   assert(VM_Version::supports_evex(), "");
8505   // Encoding: EVEX.NDS.XXX.66.0F38.W0 65 /r
8506   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8507   attributes.set_is_evex_instruction();
8508   attributes.set_embedded_opmask_register_specifier(mask);
8509   if (merge) {
8510     attributes.reset_is_clear_context();
8511   }
8512   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8513   emit_int8((unsigned char)0x65);
8514   emit_int8((unsigned char)(0xC0 | encode));
8515 }
8516 
8517 void Assembler::evpblendmb (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8518   assert(VM_Version::supports_evex(), "");
8519   assert(VM_Version::supports_avx512bw(), "");
8520   // Encoding: EVEX.NDS.512.66.0F38.W0 66 /r
8521   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8522   attributes.set_is_evex_instruction();
8523   attributes.set_embedded_opmask_register_specifier(mask);
8524   if (merge) {
8525     attributes.reset_is_clear_context();
8526   }
8527   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);       
8528   emit_int8((unsigned char)0x66);
8529   emit_int8((unsigned char)(0xC0 | encode));
8530 }
8531 
8532 void Assembler::evpblendmw (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8533   assert(VM_Version::supports_evex(), "");
8534   assert(VM_Version::supports_avx512bw(), "");
8535   // Encoding: EVEX.NDS.512.66.0F38.W1 66 /r
8536   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8537   attributes.set_is_evex_instruction();
8538   attributes.set_embedded_opmask_register_specifier(mask);
8539   if (merge) {
8540     attributes.reset_is_clear_context();
8541   }
8542   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);       
8543   emit_int8((unsigned char)0x66);
8544   emit_int8((unsigned char)(0xC0 | encode));    
8545 }
8546 
8547 void Assembler::evpblendmd (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8548   assert(VM_Version::supports_evex(), "");
8549   //Encoding: EVEX.NDS.512.66.0F38.W0 64 /r
8550   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8551   attributes.set_is_evex_instruction();
8552   attributes.set_embedded_opmask_register_specifier(mask);
8553   if (merge) {
8554     attributes.reset_is_clear_context();
8555   }
8556   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);       
8557   emit_int8((unsigned char)0x64);
8558   emit_int8((unsigned char)(0xC0 | encode));
8559 }
8560 
8561 void Assembler::evpblendmq (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8562   assert(VM_Version::supports_evex(), "");      
8563   //Encoding: EVEX.NDS.512.66.0F38.W1 64 /r
8564   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8565   attributes.set_is_evex_instruction();
8566   attributes.set_embedded_opmask_register_specifier(mask);
8567   if (merge) {
8568     attributes.reset_is_clear_context();
8569   }     
8570   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);       
8571   emit_int8((unsigned char)0x64);
8572   emit_int8((unsigned char)(0xC0 | encode));    
8573 }
8574 
8575 void Assembler::shlxl(Register dst, Register src1, Register src2) {
8576   assert(VM_Version::supports_bmi2(), "");
8577   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8578   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8579   emit_int8((unsigned char)0xF7);
8580   emit_int8((unsigned char)(0xC0 | encode));
8581 }
8582 
8583 void Assembler::shlxq(Register dst, Register src1, Register src2) {
8584   assert(VM_Version::supports_bmi2(), "");
8585   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8586   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8587   emit_int8((unsigned char)0xF7);
8588   emit_int8((unsigned char)(0xC0 | encode));
8589 }
8590 
8591 #ifndef _LP64
8592 
8593 void Assembler::incl(Register dst) {
8594   // Don't use it directly. Use MacroAssembler::incrementl() instead.
8595   emit_int8(0x40 | dst->encoding());
8596 }
8597 
8598 void Assembler::lea(Register dst, Address src) {
8599   leal(dst, src);
8600 }
8601 
8602 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) {
8603   InstructionMark im(this);
8604   emit_int8((unsigned char)0xC7);
8605   emit_operand(rax, dst);
8606   emit_data((int)imm32, rspec, 0);
8607 }
8608 
8609 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
8610   InstructionMark im(this);
8611   int encode = prefix_and_encode(dst->encoding());
8612   emit_int8((unsigned char)(0xB8 | encode));
8613   emit_data((int)imm32, rspec, 0);
8614 }
8615 
8616 void Assembler::popa() { // 32bit
8617   emit_int8(0x61);
8618 }
8619 
8620 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
8621   InstructionMark im(this);
8622   emit_int8(0x68);
8623   emit_data(imm32, rspec, 0);
8624 }
8625 
8626 void Assembler::pusha() { // 32bit
8627   emit_int8(0x60);
8628 }
8629 
8630 void Assembler::set_byte_if_not_zero(Register dst) {
8631   emit_int8(0x0F);
8632   emit_int8((unsigned char)0x95);
8633   emit_int8((unsigned char)(0xE0 | dst->encoding()));
8634 }
8635 
8636 void Assembler::shldl(Register dst, Register src) {
8637   emit_int8(0x0F);
8638   emit_int8((unsigned char)0xA5);
8639   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
8640 }
8641 
8642 // 0F A4 / r ib
8643 void Assembler::shldl(Register dst, Register src, int8_t imm8) {
8644   emit_int8(0x0F);
8645   emit_int8((unsigned char)0xA4);
8646   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
8647   emit_int8(imm8);
8648 }
8649 
8650 void Assembler::shrdl(Register dst, Register src) {
8651   emit_int8(0x0F);
8652   emit_int8((unsigned char)0xAD);
8653   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
8654 }
8655 
8656 #else // LP64
8657 
8658 void Assembler::set_byte_if_not_zero(Register dst) {
8659   int enc = prefix_and_encode(dst->encoding(), true);
8660   emit_int8(0x0F);
8661   emit_int8((unsigned char)0x95);
8662   emit_int8((unsigned char)(0xE0 | enc));
8663 }
8664 
8665 // 64bit only pieces of the assembler
8666 // This should only be used by 64bit instructions that can use rip-relative
8667 // it cannot be used by instructions that want an immediate value.
8668 
8669 bool Assembler::reachable(AddressLiteral adr) {
8670   int64_t disp;
8671   // None will force a 64bit literal to the code stream. Likely a placeholder
8672   // for something that will be patched later and we need to certain it will
8673   // always be reachable.
8674   if (adr.reloc() == relocInfo::none) {
8675     return false;
8676   }
8677   if (adr.reloc() == relocInfo::internal_word_type) {
8678     // This should be rip relative and easily reachable.
8679     return true;
8680   }
8681   if (adr.reloc() == relocInfo::virtual_call_type ||
8682       adr.reloc() == relocInfo::opt_virtual_call_type ||
8683       adr.reloc() == relocInfo::static_call_type ||
8684       adr.reloc() == relocInfo::static_stub_type ) {
8685     // This should be rip relative within the code cache and easily
8686     // reachable until we get huge code caches. (At which point
8687     // ic code is going to have issues).
8688     return true;
8689   }
8690   if (adr.reloc() != relocInfo::external_word_type &&
8691       adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
8692       adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
8693       adr.reloc() != relocInfo::runtime_call_type ) {
8694     return false;
8695   }
8696 
8697   // Stress the correction code
8698   if (ForceUnreachable) {
8699     // Must be runtimecall reloc, see if it is in the codecache
8700     // Flipping stuff in the codecache to be unreachable causes issues
8701     // with things like inline caches where the additional instructions
8702     // are not handled.
8703     if (CodeCache::find_blob(adr._target) == NULL) {
8704       return false;
8705     }
8706   }
8707   // For external_word_type/runtime_call_type if it is reachable from where we
8708   // are now (possibly a temp buffer) and where we might end up
8709   // anywhere in the codeCache then we are always reachable.
8710   // This would have to change if we ever save/restore shared code
8711   // to be more pessimistic.
8712   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
8713   if (!is_simm32(disp)) return false;
8714   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
8715   if (!is_simm32(disp)) return false;
8716 
8717   disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
8718 
8719   // Because rip relative is a disp + address_of_next_instruction and we
8720   // don't know the value of address_of_next_instruction we apply a fudge factor
8721   // to make sure we will be ok no matter the size of the instruction we get placed into.
8722   // We don't have to fudge the checks above here because they are already worst case.
8723 
8724   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
8725   // + 4 because better safe than sorry.
8726   const int fudge = 12 + 4;
8727   if (disp < 0) {
8728     disp -= fudge;
8729   } else {
8730     disp += fudge;
8731   }
8732   return is_simm32(disp);
8733 }
8734 
8735 // Check if the polling page is not reachable from the code cache using rip-relative
8736 // addressing.
8737 bool Assembler::is_polling_page_far() {
8738   intptr_t addr = (intptr_t)os::get_polling_page();
8739   return ForceUnreachable ||
8740          !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
8741          !is_simm32(addr - (intptr_t)CodeCache::high_bound());
8742 }
8743 
8744 void Assembler::emit_data64(jlong data,
8745                             relocInfo::relocType rtype,
8746                             int format) {
8747   if (rtype == relocInfo::none) {
8748     emit_int64(data);
8749   } else {
8750     emit_data64(data, Relocation::spec_simple(rtype), format);
8751   }
8752 }
8753 
8754 void Assembler::emit_data64(jlong data,
8755                             RelocationHolder const& rspec,
8756                             int format) {
8757   assert(imm_operand == 0, "default format must be immediate in this file");
8758   assert(imm_operand == format, "must be immediate");
8759   assert(inst_mark() != NULL, "must be inside InstructionMark");
8760   // Do not use AbstractAssembler::relocate, which is not intended for
8761   // embedded words.  Instead, relocate to the enclosing instruction.
8762   code_section()->relocate(inst_mark(), rspec, format);
8763 #ifdef ASSERT
8764   check_relocation(rspec, format);
8765 #endif
8766   emit_int64(data);
8767 }
8768 
8769 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
8770   if (reg_enc >= 8) {
8771     prefix(REX_B);
8772     reg_enc -= 8;
8773   } else if (byteinst && reg_enc >= 4) {
8774     prefix(REX);
8775   }
8776   return reg_enc;
8777 }
8778 
8779 int Assembler::prefixq_and_encode(int reg_enc) {
8780   if (reg_enc < 8) {
8781     prefix(REX_W);
8782   } else {
8783     prefix(REX_WB);
8784     reg_enc -= 8;
8785   }
8786   return reg_enc;
8787 }
8788 
8789 int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte) {
8790   if (dst_enc < 8) {
8791     if (src_enc >= 8) {
8792       prefix(REX_B);
8793       src_enc -= 8;
8794     } else if ((src_is_byte && src_enc >= 4) || (dst_is_byte && dst_enc >= 4)) {
8795       prefix(REX);
8796     }
8797   } else {
8798     if (src_enc < 8) {
8799       prefix(REX_R);
8800     } else {
8801       prefix(REX_RB);
8802       src_enc -= 8;
8803     }
8804     dst_enc -= 8;
8805   }
8806   return dst_enc << 3 | src_enc;
8807 }
8808 
8809 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
8810   if (dst_enc < 8) {
8811     if (src_enc < 8) {
8812       prefix(REX_W);
8813     } else {
8814       prefix(REX_WB);
8815       src_enc -= 8;
8816     }
8817   } else {
8818     if (src_enc < 8) {
8819       prefix(REX_WR);
8820     } else {
8821       prefix(REX_WRB);
8822       src_enc -= 8;
8823     }
8824     dst_enc -= 8;
8825   }
8826   return dst_enc << 3 | src_enc;
8827 }
8828 
8829 void Assembler::prefix(Register reg) {
8830   if (reg->encoding() >= 8) {
8831     prefix(REX_B);
8832   }
8833 }
8834 
8835 void Assembler::prefix(Register dst, Register src, Prefix p) {
8836   if (src->encoding() >= 8) {
8837     p = (Prefix)(p | REX_B);
8838   }
8839   if (dst->encoding() >= 8) {
8840     p = (Prefix)( p | REX_R);
8841   }
8842   if (p != Prefix_EMPTY) {
8843     // do not generate an empty prefix
8844     prefix(p);
8845   }
8846 }
8847 
8848 void Assembler::prefix(Register dst, Address adr, Prefix p) {
8849   if (adr.base_needs_rex()) {
8850     if (adr.index_needs_rex()) {
8851       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
8852     } else {
8853       prefix(REX_B);
8854     }
8855   } else {
8856     if (adr.index_needs_rex()) {
8857       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
8858     }
8859   }
8860   if (dst->encoding() >= 8) {
8861     p = (Prefix)(p | REX_R);
8862   }
8863   if (p != Prefix_EMPTY) {
8864     // do not generate an empty prefix
8865     prefix(p);
8866   }
8867 }
8868 
8869 void Assembler::prefix(Address adr) {
8870   if (adr.base_needs_rex()) {
8871     if (adr.index_needs_rex()) {
8872       prefix(REX_XB);
8873     } else {
8874       prefix(REX_B);
8875     }
8876   } else {
8877     if (adr.index_needs_rex()) {
8878       prefix(REX_X);
8879     }
8880   }
8881 }
8882 
8883 void Assembler::prefixq(Address adr) {
8884   if (adr.base_needs_rex()) {
8885     if (adr.index_needs_rex()) {
8886       prefix(REX_WXB);
8887     } else {
8888       prefix(REX_WB);
8889     }
8890   } else {
8891     if (adr.index_needs_rex()) {
8892       prefix(REX_WX);
8893     } else {
8894       prefix(REX_W);
8895     }
8896   }
8897 }
8898 
8899 
8900 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
8901   if (reg->encoding() < 8) {
8902     if (adr.base_needs_rex()) {
8903       if (adr.index_needs_rex()) {
8904         prefix(REX_XB);
8905       } else {
8906         prefix(REX_B);
8907       }
8908     } else {
8909       if (adr.index_needs_rex()) {
8910         prefix(REX_X);
8911       } else if (byteinst && reg->encoding() >= 4 ) {
8912         prefix(REX);
8913       }
8914     }
8915   } else {
8916     if (adr.base_needs_rex()) {
8917       if (adr.index_needs_rex()) {
8918         prefix(REX_RXB);
8919       } else {
8920         prefix(REX_RB);
8921       }
8922     } else {
8923       if (adr.index_needs_rex()) {
8924         prefix(REX_RX);
8925       } else {
8926         prefix(REX_R);
8927       }
8928     }
8929   }
8930 }
8931 
8932 void Assembler::prefixq(Address adr, Register src) {
8933   if (src->encoding() < 8) {
8934     if (adr.base_needs_rex()) {
8935       if (adr.index_needs_rex()) {
8936         prefix(REX_WXB);
8937       } else {
8938         prefix(REX_WB);
8939       }
8940     } else {
8941       if (adr.index_needs_rex()) {
8942         prefix(REX_WX);
8943       } else {
8944         prefix(REX_W);
8945       }
8946     }
8947   } else {
8948     if (adr.base_needs_rex()) {
8949       if (adr.index_needs_rex()) {
8950         prefix(REX_WRXB);
8951       } else {
8952         prefix(REX_WRB);
8953       }
8954     } else {
8955       if (adr.index_needs_rex()) {
8956         prefix(REX_WRX);
8957       } else {
8958         prefix(REX_WR);
8959       }
8960     }
8961   }
8962 }
8963 
8964 void Assembler::prefix(Address adr, XMMRegister reg) {
8965   if (reg->encoding() < 8) {
8966     if (adr.base_needs_rex()) {
8967       if (adr.index_needs_rex()) {
8968         prefix(REX_XB);
8969       } else {
8970         prefix(REX_B);
8971       }
8972     } else {
8973       if (adr.index_needs_rex()) {
8974         prefix(REX_X);
8975       }
8976     }
8977   } else {
8978     if (adr.base_needs_rex()) {
8979       if (adr.index_needs_rex()) {
8980         prefix(REX_RXB);
8981       } else {
8982         prefix(REX_RB);
8983       }
8984     } else {
8985       if (adr.index_needs_rex()) {
8986         prefix(REX_RX);
8987       } else {
8988         prefix(REX_R);
8989       }
8990     }
8991   }
8992 }
8993 
8994 void Assembler::prefixq(Address adr, XMMRegister src) {
8995   if (src->encoding() < 8) {
8996     if (adr.base_needs_rex()) {
8997       if (adr.index_needs_rex()) {
8998         prefix(REX_WXB);
8999       } else {
9000         prefix(REX_WB);
9001       }
9002     } else {
9003       if (adr.index_needs_rex()) {
9004         prefix(REX_WX);
9005       } else {
9006         prefix(REX_W);
9007       }
9008     }
9009   } else {
9010     if (adr.base_needs_rex()) {
9011       if (adr.index_needs_rex()) {
9012         prefix(REX_WRXB);
9013       } else {
9014         prefix(REX_WRB);
9015       }
9016     } else {
9017       if (adr.index_needs_rex()) {
9018         prefix(REX_WRX);
9019       } else {
9020         prefix(REX_WR);
9021       }
9022     }
9023   }
9024 }
9025 
9026 void Assembler::adcq(Register dst, int32_t imm32) {
9027   (void) prefixq_and_encode(dst->encoding());
9028   emit_arith(0x81, 0xD0, dst, imm32);
9029 }
9030 
9031 void Assembler::adcq(Register dst, Address src) {
9032   InstructionMark im(this);
9033   prefixq(src, dst);
9034   emit_int8(0x13);
9035   emit_operand(dst, src);
9036 }
9037 
9038 void Assembler::adcq(Register dst, Register src) {
9039   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9040   emit_arith(0x13, 0xC0, dst, src);
9041 }
9042 
9043 void Assembler::addq(Address dst, int32_t imm32) {
9044   InstructionMark im(this);
9045   prefixq(dst);
9046   emit_arith_operand(0x81, rax, dst,imm32);
9047 }
9048 
9049 void Assembler::addq(Address dst, Register src) {
9050   InstructionMark im(this);
9051   prefixq(dst, src);
9052   emit_int8(0x01);
9053   emit_operand(src, dst);
9054 }
9055 
9056 void Assembler::addq(Register dst, int32_t imm32) {
9057   (void) prefixq_and_encode(dst->encoding());
9058   emit_arith(0x81, 0xC0, dst, imm32);
9059 }
9060 
9061 void Assembler::addq(Register dst, Address src) {
9062   InstructionMark im(this);
9063   prefixq(src, dst);
9064   emit_int8(0x03);
9065   emit_operand(dst, src);
9066 }
9067 
9068 void Assembler::addq(Register dst, Register src) {
9069   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9070   emit_arith(0x03, 0xC0, dst, src);
9071 }
9072 
9073 void Assembler::adcxq(Register dst, Register src) {
9074   //assert(VM_Version::supports_adx(), "adx instructions not supported");
9075   emit_int8((unsigned char)0x66);
9076   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9077   emit_int8(0x0F);
9078   emit_int8(0x38);
9079   emit_int8((unsigned char)0xF6);
9080   emit_int8((unsigned char)(0xC0 | encode));
9081 }
9082 
9083 void Assembler::adoxq(Register dst, Register src) {
9084   //assert(VM_Version::supports_adx(), "adx instructions not supported");
9085   emit_int8((unsigned char)0xF3);
9086   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9087   emit_int8(0x0F);
9088   emit_int8(0x38);
9089   emit_int8((unsigned char)0xF6);
9090   emit_int8((unsigned char)(0xC0 | encode));
9091 }
9092 
9093 void Assembler::andq(Address dst, int32_t imm32) {
9094   InstructionMark im(this);
9095   prefixq(dst);
9096   emit_int8((unsigned char)0x81);
9097   emit_operand(rsp, dst, 4);
9098   emit_int32(imm32);
9099 }
9100 
9101 void Assembler::andq(Register dst, int32_t imm32) {
9102   (void) prefixq_and_encode(dst->encoding());
9103   emit_arith(0x81, 0xE0, dst, imm32);
9104 }
9105 
9106 void Assembler::andq(Register dst, Address src) {
9107   InstructionMark im(this);
9108   prefixq(src, dst);
9109   emit_int8(0x23);
9110   emit_operand(dst, src);
9111 }
9112 
9113 void Assembler::andq(Register dst, Register src) {
9114   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9115   emit_arith(0x23, 0xC0, dst, src);
9116 }
9117 
9118 void Assembler::andnq(Register dst, Register src1, Register src2) {
9119   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9120   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9121   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9122   emit_int8((unsigned char)0xF2);
9123   emit_int8((unsigned char)(0xC0 | encode));
9124 }
9125 
9126 void Assembler::andnq(Register dst, Register src1, Address src2) {
9127   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9128   InstructionMark im(this);
9129   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9130   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9131   emit_int8((unsigned char)0xF2);
9132   emit_operand(dst, src2);
9133 }
9134 
9135 void Assembler::bsfq(Register dst, Register src) {
9136   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9137   emit_int8(0x0F);
9138   emit_int8((unsigned char)0xBC);
9139   emit_int8((unsigned char)(0xC0 | encode));
9140 }
9141 
9142 void Assembler::bsrq(Register dst, Register src) {
9143   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9144   emit_int8(0x0F);
9145   emit_int8((unsigned char)0xBD);
9146   emit_int8((unsigned char)(0xC0 | encode));
9147 }
9148 
9149 void Assembler::bswapq(Register reg) {
9150   int encode = prefixq_and_encode(reg->encoding());
9151   emit_int8(0x0F);
9152   emit_int8((unsigned char)(0xC8 | encode));
9153 }
9154 
9155 void Assembler::blsiq(Register dst, Register src) {
9156   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9157   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9158   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9159   emit_int8((unsigned char)0xF3);
9160   emit_int8((unsigned char)(0xC0 | encode));
9161 }
9162 
9163 void Assembler::blsiq(Register dst, Address src) {
9164   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9165   InstructionMark im(this);
9166   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9167   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9168   emit_int8((unsigned char)0xF3);
9169   emit_operand(rbx, src);
9170 }
9171 
9172 void Assembler::blsmskq(Register dst, Register src) {
9173   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9174   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9175   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9176   emit_int8((unsigned char)0xF3);
9177   emit_int8((unsigned char)(0xC0 | encode));
9178 }
9179 
9180 void Assembler::blsmskq(Register dst, Address src) {
9181   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9182   InstructionMark im(this);
9183   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9184   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9185   emit_int8((unsigned char)0xF3);
9186   emit_operand(rdx, src);
9187 }
9188 
9189 void Assembler::blsrq(Register dst, Register src) {
9190   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9191   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9192   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9193   emit_int8((unsigned char)0xF3);
9194   emit_int8((unsigned char)(0xC0 | encode));
9195 }
9196 
9197 void Assembler::blsrq(Register dst, Address src) {
9198   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9199   InstructionMark im(this);
9200   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9201   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9202   emit_int8((unsigned char)0xF3);
9203   emit_operand(rcx, src);
9204 }
9205 
9206 void Assembler::cdqq() {
9207   prefix(REX_W);
9208   emit_int8((unsigned char)0x99);
9209 }
9210 
9211 void Assembler::clflush(Address adr) {
9212   prefix(adr);
9213   emit_int8(0x0F);
9214   emit_int8((unsigned char)0xAE);
9215   emit_operand(rdi, adr);
9216 }
9217 
9218 void Assembler::cmovq(Condition cc, Register dst, Register src) {
9219   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9220   emit_int8(0x0F);
9221   emit_int8(0x40 | cc);
9222   emit_int8((unsigned char)(0xC0 | encode));
9223 }
9224 
9225 void Assembler::cmovq(Condition cc, Register dst, Address src) {
9226   InstructionMark im(this);
9227   prefixq(src, dst);
9228   emit_int8(0x0F);
9229   emit_int8(0x40 | cc);
9230   emit_operand(dst, src);
9231 }
9232 
9233 void Assembler::cmpq(Address dst, int32_t imm32) {
9234   InstructionMark im(this);
9235   prefixq(dst);
9236   emit_int8((unsigned char)0x81);
9237   emit_operand(rdi, dst, 4);
9238   emit_int32(imm32);
9239 }
9240 
9241 void Assembler::cmpq(Register dst, int32_t imm32) {
9242   (void) prefixq_and_encode(dst->encoding());
9243   emit_arith(0x81, 0xF8, dst, imm32);
9244 }
9245 
9246 void Assembler::cmpq(Address dst, Register src) {
9247   InstructionMark im(this);
9248   prefixq(dst, src);
9249   emit_int8(0x3B);
9250   emit_operand(src, dst);
9251 }
9252 
9253 void Assembler::cmpq(Register dst, Register src) {
9254   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9255   emit_arith(0x3B, 0xC0, dst, src);
9256 }
9257 
9258 void Assembler::cmpq(Register dst, Address  src) {
9259   InstructionMark im(this);
9260   prefixq(src, dst);
9261   emit_int8(0x3B);
9262   emit_operand(dst, src);
9263 }
9264 
9265 void Assembler::cmpxchgq(Register reg, Address adr) {
9266   InstructionMark im(this);
9267   prefixq(adr, reg);
9268   emit_int8(0x0F);
9269   emit_int8((unsigned char)0xB1);
9270   emit_operand(reg, adr);
9271 }
9272 
9273 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
9274   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
9275   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9276   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
9277   emit_int8(0x2A);
9278   emit_int8((unsigned char)(0xC0 | encode));
9279 }
9280 
9281 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
9282   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
9283   InstructionMark im(this);
9284   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9285   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
9286   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
9287   emit_int8(0x2A);
9288   emit_operand(dst, src);
9289 }
9290 
9291 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
9292   NOT_LP64(assert(VM_Version::supports_sse(), ""));
9293   InstructionMark im(this);
9294   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9295   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
9296   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
9297   emit_int8(0x2A);
9298   emit_operand(dst, src);
9299 }
9300 
9301 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
9302   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
9303   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9304   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
9305   emit_int8(0x2C);
9306   emit_int8((unsigned char)(0xC0 | encode));
9307 }
9308 
9309 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
9310   NOT_LP64(assert(VM_Version::supports_sse(), ""));
9311   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9312   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
9313   emit_int8(0x2C);
9314   emit_int8((unsigned char)(0xC0 | encode));
9315 }
9316 
9317 void Assembler::decl(Register dst) {
9318   // Don't use it directly. Use MacroAssembler::decrementl() instead.
9319   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
9320   int encode = prefix_and_encode(dst->encoding());
9321   emit_int8((unsigned char)0xFF);
9322   emit_int8((unsigned char)(0xC8 | encode));
9323 }
9324 
9325 void Assembler::decq(Register dst) {
9326   // Don't use it directly. Use MacroAssembler::decrementq() instead.
9327   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
9328   int encode = prefixq_and_encode(dst->encoding());
9329   emit_int8((unsigned char)0xFF);
9330   emit_int8(0xC8 | encode);
9331 }
9332 
9333 void Assembler::decq(Address dst) {
9334   // Don't use it directly. Use MacroAssembler::decrementq() instead.
9335   InstructionMark im(this);
9336   prefixq(dst);
9337   emit_int8((unsigned char)0xFF);
9338   emit_operand(rcx, dst);
9339 }
9340 
9341 void Assembler::fxrstor(Address src) {
9342   prefixq(src);
9343   emit_int8(0x0F);
9344   emit_int8((unsigned char)0xAE);
9345   emit_operand(as_Register(1), src);
9346 }
9347 
9348 void Assembler::xrstor(Address src) {
9349   prefixq(src);
9350   emit_int8(0x0F);
9351   emit_int8((unsigned char)0xAE);
9352   emit_operand(as_Register(5), src);
9353 }
9354 
9355 void Assembler::fxsave(Address dst) {
9356   prefixq(dst);
9357   emit_int8(0x0F);
9358   emit_int8((unsigned char)0xAE);
9359   emit_operand(as_Register(0), dst);
9360 }
9361 
9362 void Assembler::xsave(Address dst) {
9363   prefixq(dst);
9364   emit_int8(0x0F);
9365   emit_int8((unsigned char)0xAE);
9366   emit_operand(as_Register(4), dst);
9367 }
9368 
9369 void Assembler::idivq(Register src) {
9370   int encode = prefixq_and_encode(src->encoding());
9371   emit_int8((unsigned char)0xF7);
9372   emit_int8((unsigned char)(0xF8 | encode));
9373 }
9374 
9375 void Assembler::imulq(Register dst, Register src) {
9376   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9377   emit_int8(0x0F);
9378   emit_int8((unsigned char)0xAF);
9379   emit_int8((unsigned char)(0xC0 | encode));
9380 }
9381 
9382 void Assembler::imulq(Register dst, Register src, int value) {
9383   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9384   if (is8bit(value)) {
9385     emit_int8(0x6B);
9386     emit_int8((unsigned char)(0xC0 | encode));
9387     emit_int8(value & 0xFF);
9388   } else {
9389     emit_int8(0x69);
9390     emit_int8((unsigned char)(0xC0 | encode));
9391     emit_int32(value);
9392   }
9393 }
9394 
9395 void Assembler::imulq(Register dst, Address src) {
9396   InstructionMark im(this);
9397   prefixq(src, dst);
9398   emit_int8(0x0F);
9399   emit_int8((unsigned char) 0xAF);
9400   emit_operand(dst, src);
9401 }
9402 
9403 void Assembler::incl(Register dst) {
9404   // Don't use it directly. Use MacroAssembler::incrementl() instead.
9405   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
9406   int encode = prefix_and_encode(dst->encoding());
9407   emit_int8((unsigned char)0xFF);
9408   emit_int8((unsigned char)(0xC0 | encode));
9409 }
9410 
9411 void Assembler::incq(Register dst) {
9412   // Don't use it directly. Use MacroAssembler::incrementq() instead.
9413   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
9414   int encode = prefixq_and_encode(dst->encoding());
9415   emit_int8((unsigned char)0xFF);
9416   emit_int8((unsigned char)(0xC0 | encode));
9417 }
9418 
9419 void Assembler::incq(Address dst) {
9420   // Don't use it directly. Use MacroAssembler::incrementq() instead.
9421   InstructionMark im(this);
9422   prefixq(dst);
9423   emit_int8((unsigned char)0xFF);
9424   emit_operand(rax, dst);
9425 }
9426 
9427 void Assembler::lea(Register dst, Address src) {
9428   leaq(dst, src);
9429 }
9430 
9431 void Assembler::leaq(Register dst, Address src) {
9432   InstructionMark im(this);
9433   prefixq(src, dst);
9434   emit_int8((unsigned char)0x8D);
9435   emit_operand(dst, src);
9436 }
9437 
9438 void Assembler::mov64(Register dst, int64_t imm64) {
9439   InstructionMark im(this);
9440   int encode = prefixq_and_encode(dst->encoding());
9441   emit_int8((unsigned char)(0xB8 | encode));
9442   emit_int64(imm64);
9443 }
9444 
9445 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
9446   InstructionMark im(this);
9447   int encode = prefixq_and_encode(dst->encoding());
9448   emit_int8(0xB8 | encode);
9449   emit_data64(imm64, rspec);
9450 }
9451 
9452 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
9453   InstructionMark im(this);
9454   int encode = prefix_and_encode(dst->encoding());
9455   emit_int8((unsigned char)(0xB8 | encode));
9456   emit_data((int)imm32, rspec, narrow_oop_operand);
9457 }
9458 
9459 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
9460   InstructionMark im(this);
9461   prefix(dst);
9462   emit_int8((unsigned char)0xC7);
9463   emit_operand(rax, dst, 4);
9464   emit_data((int)imm32, rspec, narrow_oop_operand);
9465 }
9466 
9467 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
9468   InstructionMark im(this);
9469   int encode = prefix_and_encode(src1->encoding());
9470   emit_int8((unsigned char)0x81);
9471   emit_int8((unsigned char)(0xF8 | encode));
9472   emit_data((int)imm32, rspec, narrow_oop_operand);
9473 }
9474 
9475 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
9476   InstructionMark im(this);
9477   prefix(src1);
9478   emit_int8((unsigned char)0x81);
9479   emit_operand(rax, src1, 4);
9480   emit_data((int)imm32, rspec, narrow_oop_operand);
9481 }
9482 
9483 void Assembler::lzcntq(Register dst, Register src) {
9484   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
9485   emit_int8((unsigned char)0xF3);
9486   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9487   emit_int8(0x0F);
9488   emit_int8((unsigned char)0xBD);
9489   emit_int8((unsigned char)(0xC0 | encode));
9490 }
9491 
9492 void Assembler::movdq(XMMRegister dst, Register src) {
9493   // table D-1 says MMX/SSE2
9494   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
9495   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9496   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
9497   emit_int8(0x6E);
9498   emit_int8((unsigned char)(0xC0 | encode));
9499 }
9500 
9501 void Assembler::movdq(Register dst, XMMRegister src) {
9502   // table D-1 says MMX/SSE2
9503   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
9504   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9505   // swap src/dst to get correct prefix
9506   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
9507   emit_int8(0x7E);
9508   emit_int8((unsigned char)(0xC0 | encode));
9509 }
9510 
9511 void Assembler::movq(Register dst, Register src) {
9512   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9513   emit_int8((unsigned char)0x8B);
9514   emit_int8((unsigned char)(0xC0 | encode));
9515 }
9516 
9517 void Assembler::movq(Register dst, Address src) {
9518   InstructionMark im(this);
9519   prefixq(src, dst);
9520   emit_int8((unsigned char)0x8B);
9521   emit_operand(dst, src);
9522 }
9523 
9524 void Assembler::movq(Address dst, Register src) {
9525   InstructionMark im(this);
9526   prefixq(dst, src);
9527   emit_int8((unsigned char)0x89);
9528   emit_operand(src, dst);
9529 }
9530 
9531 void Assembler::movsbq(Register dst, Address src) {
9532   InstructionMark im(this);
9533   prefixq(src, dst);
9534   emit_int8(0x0F);
9535   emit_int8((unsigned char)0xBE);
9536   emit_operand(dst, src);
9537 }
9538 
9539 void Assembler::movsbq(Register dst, Register src) {
9540   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9541   emit_int8(0x0F);
9542   emit_int8((unsigned char)0xBE);
9543   emit_int8((unsigned char)(0xC0 | encode));
9544 }
9545 
9546 void Assembler::movslq(Register dst, int32_t imm32) {
9547   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
9548   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
9549   // as a result we shouldn't use until tested at runtime...
9550   ShouldNotReachHere();
9551   InstructionMark im(this);
9552   int encode = prefixq_and_encode(dst->encoding());
9553   emit_int8((unsigned char)(0xC7 | encode));
9554   emit_int32(imm32);
9555 }
9556 
9557 void Assembler::movslq(Address dst, int32_t imm32) {
9558   assert(is_simm32(imm32), "lost bits");
9559   InstructionMark im(this);
9560   prefixq(dst);
9561   emit_int8((unsigned char)0xC7);
9562   emit_operand(rax, dst, 4);
9563   emit_int32(imm32);
9564 }
9565 
9566 void Assembler::movslq(Register dst, Address src) {
9567   InstructionMark im(this);
9568   prefixq(src, dst);
9569   emit_int8(0x63);
9570   emit_operand(dst, src);
9571 }
9572 
9573 void Assembler::movslq(Register dst, Register src) {
9574   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9575   emit_int8(0x63);
9576   emit_int8((unsigned char)(0xC0 | encode));
9577 }
9578 
9579 void Assembler::movswq(Register dst, Address src) {
9580   InstructionMark im(this);
9581   prefixq(src, dst);
9582   emit_int8(0x0F);
9583   emit_int8((unsigned char)0xBF);
9584   emit_operand(dst, src);
9585 }
9586 
9587 void Assembler::movswq(Register dst, Register src) {
9588   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9589   emit_int8((unsigned char)0x0F);
9590   emit_int8((unsigned char)0xBF);
9591   emit_int8((unsigned char)(0xC0 | encode));
9592 }
9593 
9594 void Assembler::movzbq(Register dst, Address src) {
9595   InstructionMark im(this);
9596   prefixq(src, dst);
9597   emit_int8((unsigned char)0x0F);
9598   emit_int8((unsigned char)0xB6);
9599   emit_operand(dst, src);
9600 }
9601 
9602 void Assembler::movzbq(Register dst, Register src) {
9603   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9604   emit_int8(0x0F);
9605   emit_int8((unsigned char)0xB6);
9606   emit_int8(0xC0 | encode);
9607 }
9608 
9609 void Assembler::movzwq(Register dst, Address src) {
9610   InstructionMark im(this);
9611   prefixq(src, dst);
9612   emit_int8((unsigned char)0x0F);
9613   emit_int8((unsigned char)0xB7);
9614   emit_operand(dst, src);
9615 }
9616 
9617 void Assembler::movzwq(Register dst, Register src) {
9618   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9619   emit_int8((unsigned char)0x0F);
9620   emit_int8((unsigned char)0xB7);
9621   emit_int8((unsigned char)(0xC0 | encode));
9622 }
9623 
9624 void Assembler::mulq(Address src) {
9625   InstructionMark im(this);
9626   prefixq(src);
9627   emit_int8((unsigned char)0xF7);
9628   emit_operand(rsp, src);
9629 }
9630 
9631 void Assembler::mulq(Register src) {
9632   int encode = prefixq_and_encode(src->encoding());
9633   emit_int8((unsigned char)0xF7);
9634   emit_int8((unsigned char)(0xE0 | encode));
9635 }
9636 
9637 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
9638   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
9639   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9640   int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
9641   emit_int8((unsigned char)0xF6);
9642   emit_int8((unsigned char)(0xC0 | encode));
9643 }
9644 
9645 void Assembler::negq(Register dst) {
9646   int encode = prefixq_and_encode(dst->encoding());
9647   emit_int8((unsigned char)0xF7);
9648   emit_int8((unsigned char)(0xD8 | encode));
9649 }
9650 
9651 void Assembler::notq(Register dst) {
9652   int encode = prefixq_and_encode(dst->encoding());
9653   emit_int8((unsigned char)0xF7);
9654   emit_int8((unsigned char)(0xD0 | encode));
9655 }
9656 
9657 void Assembler::orq(Address dst, int32_t imm32) {
9658   InstructionMark im(this);
9659   prefixq(dst);
9660   emit_int8((unsigned char)0x81);
9661   emit_operand(rcx, dst, 4);
9662   emit_int32(imm32);
9663 }
9664 
9665 void Assembler::orq(Register dst, int32_t imm32) {
9666   (void) prefixq_and_encode(dst->encoding());
9667   emit_arith(0x81, 0xC8, dst, imm32);
9668 }
9669 
9670 void Assembler::orq(Register dst, Address src) {
9671   InstructionMark im(this);
9672   prefixq(src, dst);
9673   emit_int8(0x0B);
9674   emit_operand(dst, src);
9675 }
9676 
9677 void Assembler::orq(Register dst, Register src) {
9678   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9679   emit_arith(0x0B, 0xC0, dst, src);
9680 }
9681 
9682 void Assembler::popa() { // 64bit
9683   movq(r15, Address(rsp, 0));
9684   movq(r14, Address(rsp, wordSize));
9685   movq(r13, Address(rsp, 2 * wordSize));
9686   movq(r12, Address(rsp, 3 * wordSize));
9687   movq(r11, Address(rsp, 4 * wordSize));
9688   movq(r10, Address(rsp, 5 * wordSize));
9689   movq(r9,  Address(rsp, 6 * wordSize));
9690   movq(r8,  Address(rsp, 7 * wordSize));
9691   movq(rdi, Address(rsp, 8 * wordSize));
9692   movq(rsi, Address(rsp, 9 * wordSize));
9693   movq(rbp, Address(rsp, 10 * wordSize));
9694   // skip rsp
9695   movq(rbx, Address(rsp, 12 * wordSize));
9696   movq(rdx, Address(rsp, 13 * wordSize));
9697   movq(rcx, Address(rsp, 14 * wordSize));
9698   movq(rax, Address(rsp, 15 * wordSize));
9699 
9700   addq(rsp, 16 * wordSize);
9701 }
9702 
9703 void Assembler::popcntq(Register dst, Address src) {
9704   assert(VM_Version::supports_popcnt(), "must support");
9705   InstructionMark im(this);
9706   emit_int8((unsigned char)0xF3);
9707   prefixq(src, dst);
9708   emit_int8((unsigned char)0x0F);
9709   emit_int8((unsigned char)0xB8);
9710   emit_operand(dst, src);
9711 }
9712 
9713 void Assembler::popcntq(Register dst, Register src) {
9714   assert(VM_Version::supports_popcnt(), "must support");
9715   emit_int8((unsigned char)0xF3);
9716   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9717   emit_int8((unsigned char)0x0F);
9718   emit_int8((unsigned char)0xB8);
9719   emit_int8((unsigned char)(0xC0 | encode));
9720 }
9721 
9722 void Assembler::popq(Address dst) {
9723   InstructionMark im(this);
9724   prefixq(dst);
9725   emit_int8((unsigned char)0x8F);
9726   emit_operand(rax, dst);
9727 }
9728 
9729 void Assembler::pusha() { // 64bit
9730   // we have to store original rsp.  ABI says that 128 bytes
9731   // below rsp are local scratch.
9732   movq(Address(rsp, -5 * wordSize), rsp);
9733 
9734   subq(rsp, 16 * wordSize);
9735 
9736   movq(Address(rsp, 15 * wordSize), rax);
9737   movq(Address(rsp, 14 * wordSize), rcx);
9738   movq(Address(rsp, 13 * wordSize), rdx);
9739   movq(Address(rsp, 12 * wordSize), rbx);
9740   // skip rsp
9741   movq(Address(rsp, 10 * wordSize), rbp);
9742   movq(Address(rsp, 9 * wordSize), rsi);
9743   movq(Address(rsp, 8 * wordSize), rdi);
9744   movq(Address(rsp, 7 * wordSize), r8);
9745   movq(Address(rsp, 6 * wordSize), r9);
9746   movq(Address(rsp, 5 * wordSize), r10);
9747   movq(Address(rsp, 4 * wordSize), r11);
9748   movq(Address(rsp, 3 * wordSize), r12);
9749   movq(Address(rsp, 2 * wordSize), r13);
9750   movq(Address(rsp, wordSize), r14);
9751   movq(Address(rsp, 0), r15);
9752 }
9753 
9754 void Assembler::pushq(Address src) {
9755   InstructionMark im(this);
9756   prefixq(src);
9757   emit_int8((unsigned char)0xFF);
9758   emit_operand(rsi, src);
9759 }
9760 
9761 void Assembler::rclq(Register dst, int imm8) {
9762   assert(isShiftCount(imm8 >> 1), "illegal shift count");
9763   int encode = prefixq_and_encode(dst->encoding());
9764   if (imm8 == 1) {
9765     emit_int8((unsigned char)0xD1);
9766     emit_int8((unsigned char)(0xD0 | encode));
9767   } else {
9768     emit_int8((unsigned char)0xC1);
9769     emit_int8((unsigned char)(0xD0 | encode));
9770     emit_int8(imm8);
9771   }
9772 }
9773 
9774 void Assembler::rcrq(Register dst, int imm8) {
9775   assert(isShiftCount(imm8 >> 1), "illegal shift count");
9776   int encode = prefixq_and_encode(dst->encoding());
9777   if (imm8 == 1) {
9778     emit_int8((unsigned char)0xD1);
9779     emit_int8((unsigned char)(0xD8 | encode));
9780   } else {
9781     emit_int8((unsigned char)0xC1);
9782     emit_int8((unsigned char)(0xD8 | encode));
9783     emit_int8(imm8);
9784   }
9785 }
9786 
9787 void Assembler::rorq(Register dst, int imm8) {
9788   assert(isShiftCount(imm8 >> 1), "illegal shift count");
9789   int encode = prefixq_and_encode(dst->encoding());
9790   if (imm8 == 1) {
9791     emit_int8((unsigned char)0xD1);
9792     emit_int8((unsigned char)(0xC8 | encode));
9793   } else {
9794     emit_int8((unsigned char)0xC1);
9795     emit_int8((unsigned char)(0xc8 | encode));
9796     emit_int8(imm8);
9797   }
9798 }
9799 
9800 void Assembler::rorxq(Register dst, Register src, int imm8) {
9801   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
9802   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9803   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
9804   emit_int8((unsigned char)0xF0);
9805   emit_int8((unsigned char)(0xC0 | encode));
9806   emit_int8(imm8);
9807 }
9808 
9809 void Assembler::rorxd(Register dst, Register src, int imm8) {
9810   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
9811   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9812   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
9813   emit_int8((unsigned char)0xF0);
9814   emit_int8((unsigned char)(0xC0 | encode));
9815   emit_int8(imm8);
9816 }
9817 
9818 void Assembler::sarq(Register dst, int imm8) {
9819   assert(isShiftCount(imm8 >> 1), "illegal shift count");
9820   int encode = prefixq_and_encode(dst->encoding());
9821   if (imm8 == 1) {
9822     emit_int8((unsigned char)0xD1);
9823     emit_int8((unsigned char)(0xF8 | encode));
9824   } else {
9825     emit_int8((unsigned char)0xC1);
9826     emit_int8((unsigned char)(0xF8 | encode));
9827     emit_int8(imm8);
9828   }
9829 }
9830 
9831 void Assembler::sarq(Register dst) {
9832   int encode = prefixq_and_encode(dst->encoding());
9833   emit_int8((unsigned char)0xD3);
9834   emit_int8((unsigned char)(0xF8 | encode));
9835 }
9836 
9837 void Assembler::sbbq(Address dst, int32_t imm32) {
9838   InstructionMark im(this);
9839   prefixq(dst);
9840   emit_arith_operand(0x81, rbx, dst, imm32);
9841 }
9842 
9843 void Assembler::sbbq(Register dst, int32_t imm32) {
9844   (void) prefixq_and_encode(dst->encoding());
9845   emit_arith(0x81, 0xD8, dst, imm32);
9846 }
9847 
9848 void Assembler::sbbq(Register dst, Address src) {
9849   InstructionMark im(this);
9850   prefixq(src, dst);
9851   emit_int8(0x1B);
9852   emit_operand(dst, src);
9853 }
9854 
9855 void Assembler::sbbq(Register dst, Register src) {
9856   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9857   emit_arith(0x1B, 0xC0, dst, src);
9858 }
9859 
9860 void Assembler::shlq(Register dst, int imm8) {
9861   assert(isShiftCount(imm8 >> 1), "illegal shift count");
9862   int encode = prefixq_and_encode(dst->encoding());
9863   if (imm8 == 1) {
9864     emit_int8((unsigned char)0xD1);
9865     emit_int8((unsigned char)(0xE0 | encode));
9866   } else {
9867     emit_int8((unsigned char)0xC1);
9868     emit_int8((unsigned char)(0xE0 | encode));
9869     emit_int8(imm8);
9870   }
9871 }
9872 
9873 void Assembler::shlq(Register dst) {
9874   int encode = prefixq_and_encode(dst->encoding());
9875   emit_int8((unsigned char)0xD3);
9876   emit_int8((unsigned char)(0xE0 | encode));
9877 }
9878 
9879 void Assembler::shrq(Register dst, int imm8) {
9880   assert(isShiftCount(imm8 >> 1), "illegal shift count");
9881   int encode = prefixq_and_encode(dst->encoding());
9882   emit_int8((unsigned char)0xC1);
9883   emit_int8((unsigned char)(0xE8 | encode));
9884   emit_int8(imm8);
9885 }
9886 
9887 void Assembler::shrq(Register dst) {
9888   int encode = prefixq_and_encode(dst->encoding());
9889   emit_int8((unsigned char)0xD3);
9890   emit_int8(0xE8 | encode);
9891 }
9892 
9893 void Assembler::subq(Address dst, int32_t imm32) {
9894   InstructionMark im(this);
9895   prefixq(dst);
9896   emit_arith_operand(0x81, rbp, dst, imm32);
9897 }
9898 
9899 void Assembler::subq(Address dst, Register src) {
9900   InstructionMark im(this);
9901   prefixq(dst, src);
9902   emit_int8(0x29);
9903   emit_operand(src, dst);
9904 }
9905 
9906 void Assembler::subq(Register dst, int32_t imm32) {
9907   (void) prefixq_and_encode(dst->encoding());
9908   emit_arith(0x81, 0xE8, dst, imm32);
9909 }
9910 
9911 // Force generation of a 4 byte immediate value even if it fits into 8bit
9912 void Assembler::subq_imm32(Register dst, int32_t imm32) {
9913   (void) prefixq_and_encode(dst->encoding());
9914   emit_arith_imm32(0x81, 0xE8, dst, imm32);
9915 }
9916 
9917 void Assembler::subq(Register dst, Address src) {
9918   InstructionMark im(this);
9919   prefixq(src, dst);
9920   emit_int8(0x2B);
9921   emit_operand(dst, src);
9922 }
9923 
9924 void Assembler::subq(Register dst, Register src) {
9925   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9926   emit_arith(0x2B, 0xC0, dst, src);
9927 }
9928 
9929 void Assembler::testq(Register dst, int32_t imm32) {
9930   // not using emit_arith because test
9931   // doesn't support sign-extension of
9932   // 8bit operands
9933   int encode = dst->encoding();
9934   if (encode == 0) {
9935     prefix(REX_W);
9936     emit_int8((unsigned char)0xA9);
9937   } else {
9938     encode = prefixq_and_encode(encode);
9939     emit_int8((unsigned char)0xF7);
9940     emit_int8((unsigned char)(0xC0 | encode));
9941   }
9942   emit_int32(imm32);
9943 }
9944 
9945 void Assembler::testq(Register dst, Register src) {
9946   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9947   emit_arith(0x85, 0xC0, dst, src);
9948 }
9949 
9950 void Assembler::xaddq(Address dst, Register src) {
9951   InstructionMark im(this);
9952   prefixq(dst, src);
9953   emit_int8(0x0F);
9954   emit_int8((unsigned char)0xC1);
9955   emit_operand(src, dst);
9956 }
9957 
9958 void Assembler::xchgq(Register dst, Address src) {
9959   InstructionMark im(this);
9960   prefixq(src, dst);
9961   emit_int8((unsigned char)0x87);
9962   emit_operand(dst, src);
9963 }
9964 
9965 void Assembler::xchgq(Register dst, Register src) {
9966   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9967   emit_int8((unsigned char)0x87);
9968   emit_int8((unsigned char)(0xc0 | encode));
9969 }
9970 
9971 void Assembler::xorq(Register dst, Register src) {
9972   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9973   emit_arith(0x33, 0xC0, dst, src);
9974 }
9975 
9976 void Assembler::xorq(Register dst, Address src) {
9977   InstructionMark im(this);
9978   prefixq(src, dst);
9979   emit_int8(0x33);
9980   emit_operand(dst, src);
9981 }
9982 
9983 #endif // !LP64