1 /*
   2  * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "gc/shared/cardTableBarrierSet.hpp"
  29 #include "gc/shared/collectedHeap.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "memory/resourceArea.hpp"
  32 #include "prims/methodHandles.hpp"
  33 #include "runtime/biasedLocking.hpp"
  34 #include "runtime/objectMonitor.hpp"
  35 #include "runtime/os.hpp"
  36 #include "runtime/sharedRuntime.hpp"
  37 #include "runtime/stubRoutines.hpp"
  38 #include "utilities/macros.hpp"
  39 #if INCLUDE_ALL_GCS
  40 #include "gc/g1/g1BarrierSet.hpp"
  41 #include "gc/g1/g1CollectedHeap.inline.hpp"
  42 #include "gc/g1/heapRegion.hpp"
  43 #endif // INCLUDE_ALL_GCS
  44 
  45 #ifdef PRODUCT
  46 #define BLOCK_COMMENT(str) /* nothing */
  47 #define STOP(error) stop(error)
  48 #else
  49 #define BLOCK_COMMENT(str) block_comment(str)
  50 #define STOP(error) block_comment(error); stop(error)
  51 #endif
  52 
  53 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  54 // Implementation of AddressLiteral
  55 
  56 // A 2-D table for managing compressed displacement(disp8) on EVEX enabled platforms.
  57 unsigned char tuple_table[Assembler::EVEX_ETUP + 1][Assembler::AVX_512bit + 1] = {
  58   // -----------------Table 4.5 -------------------- //
  59   16, 32, 64,  // EVEX_FV(0)
  60   4,  4,  4,   // EVEX_FV(1) - with Evex.b
  61   16, 32, 64,  // EVEX_FV(2) - with Evex.w
  62   8,  8,  8,   // EVEX_FV(3) - with Evex.w and Evex.b
  63   8,  16, 32,  // EVEX_HV(0)
  64   4,  4,  4,   // EVEX_HV(1) - with Evex.b
  65   // -----------------Table 4.6 -------------------- //
  66   16, 32, 64,  // EVEX_FVM(0)
  67   1,  1,  1,   // EVEX_T1S(0)
  68   2,  2,  2,   // EVEX_T1S(1)
  69   4,  4,  4,   // EVEX_T1S(2)
  70   8,  8,  8,   // EVEX_T1S(3)
  71   4,  4,  4,   // EVEX_T1F(0)
  72   8,  8,  8,   // EVEX_T1F(1)
  73   8,  8,  8,   // EVEX_T2(0)
  74   0,  16, 16,  // EVEX_T2(1)
  75   0,  16, 16,  // EVEX_T4(0)
  76   0,  0,  32,  // EVEX_T4(1)
  77   0,  0,  32,  // EVEX_T8(0)
  78   8,  16, 32,  // EVEX_HVM(0)
  79   4,  8,  16,  // EVEX_QVM(0)
  80   2,  4,  8,   // EVEX_OVM(0)
  81   16, 16, 16,  // EVEX_M128(0)
  82   8,  32, 64,  // EVEX_DUP(0)
  83   0,  0,  0    // EVEX_NTUP
  84 };
  85 
  86 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  87   _is_lval = false;
  88   _target = target;
  89   switch (rtype) {
  90   case relocInfo::oop_type:
  91   case relocInfo::metadata_type:
  92     // Oops are a special case. Normally they would be their own section
  93     // but in cases like icBuffer they are literals in the code stream that
  94     // we don't have a section for. We use none so that we get a literal address
  95     // which is always patchable.
  96     break;
  97   case relocInfo::external_word_type:
  98     _rspec = external_word_Relocation::spec(target);
  99     break;
 100   case relocInfo::internal_word_type:
 101     _rspec = internal_word_Relocation::spec(target);
 102     break;
 103   case relocInfo::opt_virtual_call_type:
 104     _rspec = opt_virtual_call_Relocation::spec();
 105     break;
 106   case relocInfo::static_call_type:
 107     _rspec = static_call_Relocation::spec();
 108     break;
 109   case relocInfo::runtime_call_type:
 110     _rspec = runtime_call_Relocation::spec();
 111     break;
 112   case relocInfo::poll_type:
 113   case relocInfo::poll_return_type:
 114     _rspec = Relocation::spec_simple(rtype);
 115     break;
 116   case relocInfo::none:
 117     break;
 118   default:
 119     ShouldNotReachHere();
 120     break;
 121   }
 122 }
 123 
 124 // Implementation of Address
 125 
 126 #ifdef _LP64
 127 
 128 Address Address::make_array(ArrayAddress adr) {
 129   // Not implementable on 64bit machines
 130   // Should have been handled higher up the call chain.
 131   ShouldNotReachHere();
 132   return Address();
 133 }
 134 
 135 // exceedingly dangerous constructor
 136 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
 137   _base  = noreg;
 138   _index = noreg;
 139   _scale = no_scale;
 140   _disp  = disp;
 141   switch (rtype) {
 142     case relocInfo::external_word_type:
 143       _rspec = external_word_Relocation::spec(loc);
 144       break;
 145     case relocInfo::internal_word_type:
 146       _rspec = internal_word_Relocation::spec(loc);
 147       break;
 148     case relocInfo::runtime_call_type:
 149       // HMM
 150       _rspec = runtime_call_Relocation::spec();
 151       break;
 152     case relocInfo::poll_type:
 153     case relocInfo::poll_return_type:
 154       _rspec = Relocation::spec_simple(rtype);
 155       break;
 156     case relocInfo::none:
 157       break;
 158     default:
 159       ShouldNotReachHere();
 160   }
 161 }
 162 #else // LP64
 163 
 164 Address Address::make_array(ArrayAddress adr) {
 165   AddressLiteral base = adr.base();
 166   Address index = adr.index();
 167   assert(index._disp == 0, "must not have disp"); // maybe it can?
 168   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
 169   array._rspec = base._rspec;
 170   return array;
 171 }
 172 
 173 // exceedingly dangerous constructor
 174 Address::Address(address loc, RelocationHolder spec) {
 175   _base  = noreg;
 176   _index = noreg;
 177   _scale = no_scale;
 178   _disp  = (intptr_t) loc;
 179   _rspec = spec;
 180 }
 181 
 182 #endif // _LP64
 183 
 184 
 185 
 186 // Convert the raw encoding form into the form expected by the constructor for
 187 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 188 // that to noreg for the Address constructor.
 189 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
 190   RelocationHolder rspec;
 191   if (disp_reloc != relocInfo::none) {
 192     rspec = Relocation::spec_simple(disp_reloc);
 193   }
 194   bool valid_index = index != rsp->encoding();
 195   if (valid_index) {
 196     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 197     madr._rspec = rspec;
 198     return madr;
 199   } else {
 200     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 201     madr._rspec = rspec;
 202     return madr;
 203   }
 204 }
 205 
 206 // Implementation of Assembler
 207 
 208 int AbstractAssembler::code_fill_byte() {
 209   return (u_char)'\xF4'; // hlt
 210 }
 211 
 212 // make this go away someday
 213 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
 214   if (rtype == relocInfo::none)
 215     emit_int32(data);
 216   else
 217     emit_data(data, Relocation::spec_simple(rtype), format);
 218 }
 219 
 220 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
 221   assert(imm_operand == 0, "default format must be immediate in this file");
 222   assert(inst_mark() != NULL, "must be inside InstructionMark");
 223   if (rspec.type() !=  relocInfo::none) {
 224     #ifdef ASSERT
 225       check_relocation(rspec, format);
 226     #endif
 227     // Do not use AbstractAssembler::relocate, which is not intended for
 228     // embedded words.  Instead, relocate to the enclosing instruction.
 229 
 230     // hack. call32 is too wide for mask so use disp32
 231     if (format == call32_operand)
 232       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 233     else
 234       code_section()->relocate(inst_mark(), rspec, format);
 235   }
 236   emit_int32(data);
 237 }
 238 
 239 static int encode(Register r) {
 240   int enc = r->encoding();
 241   if (enc >= 8) {
 242     enc -= 8;
 243   }
 244   return enc;
 245 }
 246 
 247 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
 248   assert(dst->has_byte_register(), "must have byte register");
 249   assert(isByte(op1) && isByte(op2), "wrong opcode");
 250   assert(isByte(imm8), "not a byte");
 251   assert((op1 & 0x01) == 0, "should be 8bit operation");
 252   emit_int8(op1);
 253   emit_int8(op2 | encode(dst));
 254   emit_int8(imm8);
 255 }
 256 
 257 
 258 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
 259   assert(isByte(op1) && isByte(op2), "wrong opcode");
 260   assert((op1 & 0x01) == 1, "should be 32bit operation");
 261   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 262   if (is8bit(imm32)) {
 263     emit_int8(op1 | 0x02); // set sign bit
 264     emit_int8(op2 | encode(dst));
 265     emit_int8(imm32 & 0xFF);
 266   } else {
 267     emit_int8(op1);
 268     emit_int8(op2 | encode(dst));
 269     emit_int32(imm32);
 270   }
 271 }
 272 
 273 // Force generation of a 4 byte immediate value even if it fits into 8bit
 274 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
 275   assert(isByte(op1) && isByte(op2), "wrong opcode");
 276   assert((op1 & 0x01) == 1, "should be 32bit operation");
 277   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 278   emit_int8(op1);
 279   emit_int8(op2 | encode(dst));
 280   emit_int32(imm32);
 281 }
 282 
 283 // immediate-to-memory forms
 284 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
 285   assert((op1 & 0x01) == 1, "should be 32bit operation");
 286   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 287   if (is8bit(imm32)) {
 288     emit_int8(op1 | 0x02); // set sign bit
 289     emit_operand(rm, adr, 1);
 290     emit_int8(imm32 & 0xFF);
 291   } else {
 292     emit_int8(op1);
 293     emit_operand(rm, adr, 4);
 294     emit_int32(imm32);
 295   }
 296 }
 297 
 298 
 299 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 300   assert(isByte(op1) && isByte(op2), "wrong opcode");
 301   emit_int8(op1);
 302   emit_int8(op2 | encode(dst) << 3 | encode(src));
 303 }
 304 
 305 
 306 bool Assembler::query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
 307                                            int cur_tuple_type, int in_size_in_bits, int cur_encoding) {
 308   int mod_idx = 0;
 309   // We will test if the displacement fits the compressed format and if so
 310   // apply the compression to the displacment iff the result is8bit.
 311   if (VM_Version::supports_evex() && is_evex_inst) {
 312     switch (cur_tuple_type) {
 313     case EVEX_FV:
 314       if ((cur_encoding & VEX_W) == VEX_W) {
 315         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 316       } else {
 317         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 318       }
 319       break;
 320 
 321     case EVEX_HV:
 322       mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 323       break;
 324 
 325     case EVEX_FVM:
 326       break;
 327 
 328     case EVEX_T1S:
 329       switch (in_size_in_bits) {
 330       case EVEX_8bit:
 331         break;
 332 
 333       case EVEX_16bit:
 334         mod_idx = 1;
 335         break;
 336 
 337       case EVEX_32bit:
 338         mod_idx = 2;
 339         break;
 340 
 341       case EVEX_64bit:
 342         mod_idx = 3;
 343         break;
 344       }
 345       break;
 346 
 347     case EVEX_T1F:
 348     case EVEX_T2:
 349     case EVEX_T4:
 350       mod_idx = (in_size_in_bits == EVEX_64bit) ? 1 : 0;
 351       break;
 352 
 353     case EVEX_T8:
 354       break;
 355 
 356     case EVEX_HVM:
 357       break;
 358 
 359     case EVEX_QVM:
 360       break;
 361 
 362     case EVEX_OVM:
 363       break;
 364 
 365     case EVEX_M128:
 366       break;
 367 
 368     case EVEX_DUP:
 369       break;
 370 
 371     default:
 372       assert(0, "no valid evex tuple_table entry");
 373       break;
 374     }
 375 
 376     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 377       int disp_factor = tuple_table[cur_tuple_type + mod_idx][vector_len];
 378       if ((disp % disp_factor) == 0) {
 379         int new_disp = disp / disp_factor;
 380         if ((-0x80 <= new_disp && new_disp < 0x80)) {
 381           disp = new_disp;
 382         }
 383       } else {
 384         return false;
 385       }
 386     }
 387   }
 388   return (-0x80 <= disp && disp < 0x80);
 389 }
 390 
 391 
 392 bool Assembler::emit_compressed_disp_byte(int &disp) {
 393   int mod_idx = 0;
 394   // We will test if the displacement fits the compressed format and if so
 395   // apply the compression to the displacment iff the result is8bit.
 396   if (VM_Version::supports_evex() && _attributes && _attributes->is_evex_instruction()) {
 397     int evex_encoding = _attributes->get_evex_encoding();
 398     int tuple_type = _attributes->get_tuple_type();
 399     switch (tuple_type) {
 400     case EVEX_FV:
 401       if ((evex_encoding & VEX_W) == VEX_W) {
 402         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 403       } else {
 404         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 405       }
 406       break;
 407 
 408     case EVEX_HV:
 409       mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 410       break;
 411 
 412     case EVEX_FVM:
 413       break;
 414 
 415     case EVEX_T1S:
 416       switch (_attributes->get_input_size()) {
 417       case EVEX_8bit:
 418         break;
 419 
 420       case EVEX_16bit:
 421         mod_idx = 1;
 422         break;
 423 
 424       case EVEX_32bit:
 425         mod_idx = 2;
 426         break;
 427 
 428       case EVEX_64bit:
 429         mod_idx = 3;
 430         break;
 431       }
 432       break;
 433 
 434     case EVEX_T1F:
 435     case EVEX_T2:
 436     case EVEX_T4:
 437       mod_idx = (_attributes->get_input_size() == EVEX_64bit) ? 1 : 0;
 438       break;
 439 
 440     case EVEX_T8:
 441       break;
 442 
 443     case EVEX_HVM:
 444       break;
 445 
 446     case EVEX_QVM:
 447       break;
 448 
 449     case EVEX_OVM:
 450       break;
 451 
 452     case EVEX_M128:
 453       break;
 454 
 455     case EVEX_DUP:
 456       break;
 457 
 458     default:
 459       assert(0, "no valid evex tuple_table entry");
 460       break;
 461     }
 462 
 463     int vector_len = _attributes->get_vector_len();
 464     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 465       int disp_factor = tuple_table[tuple_type + mod_idx][vector_len];
 466       if ((disp % disp_factor) == 0) {
 467         int new_disp = disp / disp_factor;
 468         if (is8bit(new_disp)) {
 469           disp = new_disp;
 470         }
 471       } else {
 472         return false;
 473       }
 474     }
 475   }
 476   return is8bit(disp);
 477 }
 478 
 479 
 480 void Assembler::emit_operand(Register reg, Register base, Register index,
 481                              Address::ScaleFactor scale, int disp,
 482                              RelocationHolder const& rspec,
 483                              int rip_relative_correction) {
 484   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 485 
 486   // Encode the registers as needed in the fields they are used in
 487 
 488   int regenc = encode(reg) << 3;
 489   int indexenc = index->is_valid() ? encode(index) << 3 : 0;
 490   int baseenc = base->is_valid() ? encode(base) : 0;
 491 
 492   if (base->is_valid()) {
 493     if (index->is_valid()) {
 494       assert(scale != Address::no_scale, "inconsistent address");
 495       // [base + index*scale + disp]
 496       if (disp == 0 && rtype == relocInfo::none  &&
 497           base != rbp LP64_ONLY(&& base != r13)) {
 498         // [base + index*scale]
 499         // [00 reg 100][ss index base]
 500         assert(index != rsp, "illegal addressing mode");
 501         emit_int8(0x04 | regenc);
 502         emit_int8(scale << 6 | indexenc | baseenc);
 503       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 504         // [base + index*scale + imm8]
 505         // [01 reg 100][ss index base] imm8
 506         assert(index != rsp, "illegal addressing mode");
 507         emit_int8(0x44 | regenc);
 508         emit_int8(scale << 6 | indexenc | baseenc);
 509         emit_int8(disp & 0xFF);
 510       } else {
 511         // [base + index*scale + disp32]
 512         // [10 reg 100][ss index base] disp32
 513         assert(index != rsp, "illegal addressing mode");
 514         emit_int8(0x84 | regenc);
 515         emit_int8(scale << 6 | indexenc | baseenc);
 516         emit_data(disp, rspec, disp32_operand);
 517       }
 518     } else if (base == rsp LP64_ONLY(|| base == r12)) {
 519       // [rsp + disp]
 520       if (disp == 0 && rtype == relocInfo::none) {
 521         // [rsp]
 522         // [00 reg 100][00 100 100]
 523         emit_int8(0x04 | regenc);
 524         emit_int8(0x24);
 525       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 526         // [rsp + imm8]
 527         // [01 reg 100][00 100 100] disp8
 528         emit_int8(0x44 | regenc);
 529         emit_int8(0x24);
 530         emit_int8(disp & 0xFF);
 531       } else {
 532         // [rsp + imm32]
 533         // [10 reg 100][00 100 100] disp32
 534         emit_int8(0x84 | regenc);
 535         emit_int8(0x24);
 536         emit_data(disp, rspec, disp32_operand);
 537       }
 538     } else {
 539       // [base + disp]
 540       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
 541       if (disp == 0 && rtype == relocInfo::none &&
 542           base != rbp LP64_ONLY(&& base != r13)) {
 543         // [base]
 544         // [00 reg base]
 545         emit_int8(0x00 | regenc | baseenc);
 546       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 547         // [base + disp8]
 548         // [01 reg base] disp8
 549         emit_int8(0x40 | regenc | baseenc);
 550         emit_int8(disp & 0xFF);
 551       } else {
 552         // [base + disp32]
 553         // [10 reg base] disp32
 554         emit_int8(0x80 | regenc | baseenc);
 555         emit_data(disp, rspec, disp32_operand);
 556       }
 557     }
 558   } else {
 559     if (index->is_valid()) {
 560       assert(scale != Address::no_scale, "inconsistent address");
 561       // [index*scale + disp]
 562       // [00 reg 100][ss index 101] disp32
 563       assert(index != rsp, "illegal addressing mode");
 564       emit_int8(0x04 | regenc);
 565       emit_int8(scale << 6 | indexenc | 0x05);
 566       emit_data(disp, rspec, disp32_operand);
 567     } else if (rtype != relocInfo::none ) {
 568       // [disp] (64bit) RIP-RELATIVE (32bit) abs
 569       // [00 000 101] disp32
 570 
 571       emit_int8(0x05 | regenc);
 572       // Note that the RIP-rel. correction applies to the generated
 573       // disp field, but _not_ to the target address in the rspec.
 574 
 575       // disp was created by converting the target address minus the pc
 576       // at the start of the instruction. That needs more correction here.
 577       // intptr_t disp = target - next_ip;
 578       assert(inst_mark() != NULL, "must be inside InstructionMark");
 579       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 580       int64_t adjusted = disp;
 581       // Do rip-rel adjustment for 64bit
 582       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 583       assert(is_simm32(adjusted),
 584              "must be 32bit offset (RIP relative address)");
 585       emit_data((int32_t) adjusted, rspec, disp32_operand);
 586 
 587     } else {
 588       // 32bit never did this, did everything as the rip-rel/disp code above
 589       // [disp] ABSOLUTE
 590       // [00 reg 100][00 100 101] disp32
 591       emit_int8(0x04 | regenc);
 592       emit_int8(0x25);
 593       emit_data(disp, rspec, disp32_operand);
 594     }
 595   }
 596 }
 597 
 598 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 599                              Address::ScaleFactor scale, int disp,
 600                              RelocationHolder const& rspec) {
 601   if (UseAVX > 2) {
 602     int xreg_enc = reg->encoding();
 603     if (xreg_enc > 15) {
 604       XMMRegister new_reg = as_XMMRegister(xreg_enc & 0xf);
 605       emit_operand((Register)new_reg, base, index, scale, disp, rspec);
 606       return;
 607     }
 608   }
 609   emit_operand((Register)reg, base, index, scale, disp, rspec);
 610 }
 611 
 612 // Secret local extension to Assembler::WhichOperand:
 613 #define end_pc_operand (_WhichOperand_limit)
 614 
 615 address Assembler::locate_operand(address inst, WhichOperand which) {
 616   // Decode the given instruction, and return the address of
 617   // an embedded 32-bit operand word.
 618 
 619   // If "which" is disp32_operand, selects the displacement portion
 620   // of an effective address specifier.
 621   // If "which" is imm64_operand, selects the trailing immediate constant.
 622   // If "which" is call32_operand, selects the displacement of a call or jump.
 623   // Caller is responsible for ensuring that there is such an operand,
 624   // and that it is 32/64 bits wide.
 625 
 626   // If "which" is end_pc_operand, find the end of the instruction.
 627 
 628   address ip = inst;
 629   bool is_64bit = false;
 630 
 631   debug_only(bool has_disp32 = false);
 632   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
 633 
 634   again_after_prefix:
 635   switch (0xFF & *ip++) {
 636 
 637   // These convenience macros generate groups of "case" labels for the switch.
 638 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
 639 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
 640              case (x)+4: case (x)+5: case (x)+6: case (x)+7
 641 #define REP16(x) REP8((x)+0): \
 642               case REP8((x)+8)
 643 
 644   case CS_segment:
 645   case SS_segment:
 646   case DS_segment:
 647   case ES_segment:
 648   case FS_segment:
 649   case GS_segment:
 650     // Seems dubious
 651     LP64_ONLY(assert(false, "shouldn't have that prefix"));
 652     assert(ip == inst+1, "only one prefix allowed");
 653     goto again_after_prefix;
 654 
 655   case 0x67:
 656   case REX:
 657   case REX_B:
 658   case REX_X:
 659   case REX_XB:
 660   case REX_R:
 661   case REX_RB:
 662   case REX_RX:
 663   case REX_RXB:
 664     NOT_LP64(assert(false, "64bit prefixes"));
 665     goto again_after_prefix;
 666 
 667   case REX_W:
 668   case REX_WB:
 669   case REX_WX:
 670   case REX_WXB:
 671   case REX_WR:
 672   case REX_WRB:
 673   case REX_WRX:
 674   case REX_WRXB:
 675     NOT_LP64(assert(false, "64bit prefixes"));
 676     is_64bit = true;
 677     goto again_after_prefix;
 678 
 679   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
 680   case 0x88: // movb a, r
 681   case 0x89: // movl a, r
 682   case 0x8A: // movb r, a
 683   case 0x8B: // movl r, a
 684   case 0x8F: // popl a
 685     debug_only(has_disp32 = true);
 686     break;
 687 
 688   case 0x68: // pushq #32
 689     if (which == end_pc_operand) {
 690       return ip + 4;
 691     }
 692     assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
 693     return ip;                  // not produced by emit_operand
 694 
 695   case 0x66: // movw ... (size prefix)
 696     again_after_size_prefix2:
 697     switch (0xFF & *ip++) {
 698     case REX:
 699     case REX_B:
 700     case REX_X:
 701     case REX_XB:
 702     case REX_R:
 703     case REX_RB:
 704     case REX_RX:
 705     case REX_RXB:
 706     case REX_W:
 707     case REX_WB:
 708     case REX_WX:
 709     case REX_WXB:
 710     case REX_WR:
 711     case REX_WRB:
 712     case REX_WRX:
 713     case REX_WRXB:
 714       NOT_LP64(assert(false, "64bit prefix found"));
 715       goto again_after_size_prefix2;
 716     case 0x8B: // movw r, a
 717     case 0x89: // movw a, r
 718       debug_only(has_disp32 = true);
 719       break;
 720     case 0xC7: // movw a, #16
 721       debug_only(has_disp32 = true);
 722       tail_size = 2;  // the imm16
 723       break;
 724     case 0x0F: // several SSE/SSE2 variants
 725       ip--;    // reparse the 0x0F
 726       goto again_after_prefix;
 727     default:
 728       ShouldNotReachHere();
 729     }
 730     break;
 731 
 732   case REP8(0xB8): // movl/q r, #32/#64(oop?)
 733     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
 734     // these asserts are somewhat nonsensical
 735 #ifndef _LP64
 736     assert(which == imm_operand || which == disp32_operand,
 737            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 738 #else
 739     assert((which == call32_operand || which == imm_operand) && is_64bit ||
 740            which == narrow_oop_operand && !is_64bit,
 741            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 742 #endif // _LP64
 743     return ip;
 744 
 745   case 0x69: // imul r, a, #32
 746   case 0xC7: // movl a, #32(oop?)
 747     tail_size = 4;
 748     debug_only(has_disp32 = true); // has both kinds of operands!
 749     break;
 750 
 751   case 0x0F: // movx..., etc.
 752     switch (0xFF & *ip++) {
 753     case 0x3A: // pcmpestri
 754       tail_size = 1;
 755     case 0x38: // ptest, pmovzxbw
 756       ip++; // skip opcode
 757       debug_only(has_disp32 = true); // has both kinds of operands!
 758       break;
 759 
 760     case 0x70: // pshufd r, r/a, #8
 761       debug_only(has_disp32 = true); // has both kinds of operands!
 762     case 0x73: // psrldq r, #8
 763       tail_size = 1;
 764       break;
 765 
 766     case 0x12: // movlps
 767     case 0x28: // movaps
 768     case 0x2E: // ucomiss
 769     case 0x2F: // comiss
 770     case 0x54: // andps
 771     case 0x55: // andnps
 772     case 0x56: // orps
 773     case 0x57: // xorps
 774     case 0x58: // addpd
 775     case 0x59: // mulpd
 776     case 0x6E: // movd
 777     case 0x7E: // movd
 778     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
 779     case 0xFE: // paddd
 780       debug_only(has_disp32 = true);
 781       break;
 782 
 783     case 0xAD: // shrd r, a, %cl
 784     case 0xAF: // imul r, a
 785     case 0xBE: // movsbl r, a (movsxb)
 786     case 0xBF: // movswl r, a (movsxw)
 787     case 0xB6: // movzbl r, a (movzxb)
 788     case 0xB7: // movzwl r, a (movzxw)
 789     case REP16(0x40): // cmovl cc, r, a
 790     case 0xB0: // cmpxchgb
 791     case 0xB1: // cmpxchg
 792     case 0xC1: // xaddl
 793     case 0xC7: // cmpxchg8
 794     case REP16(0x90): // setcc a
 795       debug_only(has_disp32 = true);
 796       // fall out of the switch to decode the address
 797       break;
 798 
 799     case 0xC4: // pinsrw r, a, #8
 800       debug_only(has_disp32 = true);
 801     case 0xC5: // pextrw r, r, #8
 802       tail_size = 1;  // the imm8
 803       break;
 804 
 805     case 0xAC: // shrd r, a, #8
 806       debug_only(has_disp32 = true);
 807       tail_size = 1;  // the imm8
 808       break;
 809 
 810     case REP16(0x80): // jcc rdisp32
 811       if (which == end_pc_operand)  return ip + 4;
 812       assert(which == call32_operand, "jcc has no disp32 or imm");
 813       return ip;
 814     default:
 815       ShouldNotReachHere();
 816     }
 817     break;
 818 
 819   case 0x81: // addl a, #32; addl r, #32
 820     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 821     // on 32bit in the case of cmpl, the imm might be an oop
 822     tail_size = 4;
 823     debug_only(has_disp32 = true); // has both kinds of operands!
 824     break;
 825 
 826   case 0x83: // addl a, #8; addl r, #8
 827     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 828     debug_only(has_disp32 = true); // has both kinds of operands!
 829     tail_size = 1;
 830     break;
 831 
 832   case 0x9B:
 833     switch (0xFF & *ip++) {
 834     case 0xD9: // fnstcw a
 835       debug_only(has_disp32 = true);
 836       break;
 837     default:
 838       ShouldNotReachHere();
 839     }
 840     break;
 841 
 842   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
 843   case REP4(0x10): // adc...
 844   case REP4(0x20): // and...
 845   case REP4(0x30): // xor...
 846   case REP4(0x08): // or...
 847   case REP4(0x18): // sbb...
 848   case REP4(0x28): // sub...
 849   case 0xF7: // mull a
 850   case 0x8D: // lea r, a
 851   case 0x87: // xchg r, a
 852   case REP4(0x38): // cmp...
 853   case 0x85: // test r, a
 854     debug_only(has_disp32 = true); // has both kinds of operands!
 855     break;
 856 
 857   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
 858   case 0xC6: // movb a, #8
 859   case 0x80: // cmpb a, #8
 860   case 0x6B: // imul r, a, #8
 861     debug_only(has_disp32 = true); // has both kinds of operands!
 862     tail_size = 1; // the imm8
 863     break;
 864 
 865   case 0xC4: // VEX_3bytes
 866   case 0xC5: // VEX_2bytes
 867     assert((UseAVX > 0), "shouldn't have VEX prefix");
 868     assert(ip == inst+1, "no prefixes allowed");
 869     // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
 870     // but they have prefix 0x0F and processed when 0x0F processed above.
 871     //
 872     // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
 873     // instructions (these instructions are not supported in 64-bit mode).
 874     // To distinguish them bits [7:6] are set in the VEX second byte since
 875     // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
 876     // those VEX bits REX and vvvv bits are inverted.
 877     //
 878     // Fortunately C2 doesn't generate these instructions so we don't need
 879     // to check for them in product version.
 880 
 881     // Check second byte
 882     NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
 883 
 884     int vex_opcode;
 885     // First byte
 886     if ((0xFF & *inst) == VEX_3bytes) {
 887       vex_opcode = VEX_OPCODE_MASK & *ip;
 888       ip++; // third byte
 889       is_64bit = ((VEX_W & *ip) == VEX_W);
 890     } else {
 891       vex_opcode = VEX_OPCODE_0F;
 892     }
 893     ip++; // opcode
 894     // To find the end of instruction (which == end_pc_operand).
 895     switch (vex_opcode) {
 896       case VEX_OPCODE_0F:
 897         switch (0xFF & *ip) {
 898         case 0x70: // pshufd r, r/a, #8
 899         case 0x71: // ps[rl|ra|ll]w r, #8
 900         case 0x72: // ps[rl|ra|ll]d r, #8
 901         case 0x73: // ps[rl|ra|ll]q r, #8
 902         case 0xC2: // cmp[ps|pd|ss|sd] r, r, r/a, #8
 903         case 0xC4: // pinsrw r, r, r/a, #8
 904         case 0xC5: // pextrw r/a, r, #8
 905         case 0xC6: // shufp[s|d] r, r, r/a, #8
 906           tail_size = 1;  // the imm8
 907           break;
 908         }
 909         break;
 910       case VEX_OPCODE_0F_3A:
 911         tail_size = 1;
 912         break;
 913     }
 914     ip++; // skip opcode
 915     debug_only(has_disp32 = true); // has both kinds of operands!
 916     break;
 917 
 918   case 0x62: // EVEX_4bytes
 919     assert(VM_Version::supports_evex(), "shouldn't have EVEX prefix");
 920     assert(ip == inst+1, "no prefixes allowed");
 921     // no EVEX collisions, all instructions that have 0x62 opcodes
 922     // have EVEX versions and are subopcodes of 0x66
 923     ip++; // skip P0 and exmaine W in P1
 924     is_64bit = ((VEX_W & *ip) == VEX_W);
 925     ip++; // move to P2
 926     ip++; // skip P2, move to opcode
 927     // To find the end of instruction (which == end_pc_operand).
 928     switch (0xFF & *ip) {
 929     case 0x22: // pinsrd r, r/a, #8
 930     case 0x61: // pcmpestri r, r/a, #8
 931     case 0x70: // pshufd r, r/a, #8
 932     case 0x73: // psrldq r, #8
 933       tail_size = 1;  // the imm8
 934       break;
 935     default:
 936       break;
 937     }
 938     ip++; // skip opcode
 939     debug_only(has_disp32 = true); // has both kinds of operands!
 940     break;
 941 
 942   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 943   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 944   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 945   case 0xDD: // fld_d a; fst_d a; fstp_d a
 946   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 947   case 0xDF: // fild_d a; fistp_d a
 948   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 949   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 950   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 951     debug_only(has_disp32 = true);
 952     break;
 953 
 954   case 0xE8: // call rdisp32
 955   case 0xE9: // jmp  rdisp32
 956     if (which == end_pc_operand)  return ip + 4;
 957     assert(which == call32_operand, "call has no disp32 or imm");
 958     return ip;
 959 
 960   case 0xF0:                    // Lock
 961     assert(os::is_MP(), "only on MP");
 962     goto again_after_prefix;
 963 
 964   case 0xF3:                    // For SSE
 965   case 0xF2:                    // For SSE2
 966     switch (0xFF & *ip++) {
 967     case REX:
 968     case REX_B:
 969     case REX_X:
 970     case REX_XB:
 971     case REX_R:
 972     case REX_RB:
 973     case REX_RX:
 974     case REX_RXB:
 975     case REX_W:
 976     case REX_WB:
 977     case REX_WX:
 978     case REX_WXB:
 979     case REX_WR:
 980     case REX_WRB:
 981     case REX_WRX:
 982     case REX_WRXB:
 983       NOT_LP64(assert(false, "found 64bit prefix"));
 984       ip++;
 985     default:
 986       ip++;
 987     }
 988     debug_only(has_disp32 = true); // has both kinds of operands!
 989     break;
 990 
 991   default:
 992     ShouldNotReachHere();
 993 
 994 #undef REP8
 995 #undef REP16
 996   }
 997 
 998   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
 999 #ifdef _LP64
1000   assert(which != imm_operand, "instruction is not a movq reg, imm64");
1001 #else
1002   // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
1003   assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
1004 #endif // LP64
1005   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
1006 
1007   // parse the output of emit_operand
1008   int op2 = 0xFF & *ip++;
1009   int base = op2 & 0x07;
1010   int op3 = -1;
1011   const int b100 = 4;
1012   const int b101 = 5;
1013   if (base == b100 && (op2 >> 6) != 3) {
1014     op3 = 0xFF & *ip++;
1015     base = op3 & 0x07;   // refetch the base
1016   }
1017   // now ip points at the disp (if any)
1018 
1019   switch (op2 >> 6) {
1020   case 0:
1021     // [00 reg  100][ss index base]
1022     // [00 reg  100][00   100  esp]
1023     // [00 reg base]
1024     // [00 reg  100][ss index  101][disp32]
1025     // [00 reg  101]               [disp32]
1026 
1027     if (base == b101) {
1028       if (which == disp32_operand)
1029         return ip;              // caller wants the disp32
1030       ip += 4;                  // skip the disp32
1031     }
1032     break;
1033 
1034   case 1:
1035     // [01 reg  100][ss index base][disp8]
1036     // [01 reg  100][00   100  esp][disp8]
1037     // [01 reg base]               [disp8]
1038     ip += 1;                    // skip the disp8
1039     break;
1040 
1041   case 2:
1042     // [10 reg  100][ss index base][disp32]
1043     // [10 reg  100][00   100  esp][disp32]
1044     // [10 reg base]               [disp32]
1045     if (which == disp32_operand)
1046       return ip;                // caller wants the disp32
1047     ip += 4;                    // skip the disp32
1048     break;
1049 
1050   case 3:
1051     // [11 reg base]  (not a memory addressing mode)
1052     break;
1053   }
1054 
1055   if (which == end_pc_operand) {
1056     return ip + tail_size;
1057   }
1058 
1059 #ifdef _LP64
1060   assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
1061 #else
1062   assert(which == imm_operand, "instruction has only an imm field");
1063 #endif // LP64
1064   return ip;
1065 }
1066 
1067 address Assembler::locate_next_instruction(address inst) {
1068   // Secretly share code with locate_operand:
1069   return locate_operand(inst, end_pc_operand);
1070 }
1071 
1072 
1073 #ifdef ASSERT
1074 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
1075   address inst = inst_mark();
1076   assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
1077   address opnd;
1078 
1079   Relocation* r = rspec.reloc();
1080   if (r->type() == relocInfo::none) {
1081     return;
1082   } else if (r->is_call() || format == call32_operand) {
1083     // assert(format == imm32_operand, "cannot specify a nonzero format");
1084     opnd = locate_operand(inst, call32_operand);
1085   } else if (r->is_data()) {
1086     assert(format == imm_operand || format == disp32_operand
1087            LP64_ONLY(|| format == narrow_oop_operand), "format ok");
1088     opnd = locate_operand(inst, (WhichOperand)format);
1089   } else {
1090     assert(format == imm_operand, "cannot specify a format");
1091     return;
1092   }
1093   assert(opnd == pc(), "must put operand where relocs can find it");
1094 }
1095 #endif // ASSERT
1096 
1097 void Assembler::emit_operand32(Register reg, Address adr) {
1098   assert(reg->encoding() < 8, "no extended registers");
1099   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1100   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1101                adr._rspec);
1102 }
1103 
1104 void Assembler::emit_operand(Register reg, Address adr,
1105                              int rip_relative_correction) {
1106   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1107                adr._rspec,
1108                rip_relative_correction);
1109 }
1110 
1111 void Assembler::emit_operand(XMMRegister reg, Address adr) {
1112   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1113                adr._rspec);
1114 }
1115 
1116 // MMX operations
1117 void Assembler::emit_operand(MMXRegister reg, Address adr) {
1118   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1119   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1120 }
1121 
1122 // work around gcc (3.2.1-7a) bug
1123 void Assembler::emit_operand(Address adr, MMXRegister reg) {
1124   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1125   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1126 }
1127 
1128 
1129 void Assembler::emit_farith(int b1, int b2, int i) {
1130   assert(isByte(b1) && isByte(b2), "wrong opcode");
1131   assert(0 <= i &&  i < 8, "illegal stack offset");
1132   emit_int8(b1);
1133   emit_int8(b2 + i);
1134 }
1135 
1136 
1137 // Now the Assembler instructions (identical for 32/64 bits)
1138 
1139 void Assembler::adcl(Address dst, int32_t imm32) {
1140   InstructionMark im(this);
1141   prefix(dst);
1142   emit_arith_operand(0x81, rdx, dst, imm32);
1143 }
1144 
1145 void Assembler::adcl(Address dst, Register src) {
1146   InstructionMark im(this);
1147   prefix(dst, src);
1148   emit_int8(0x11);
1149   emit_operand(src, dst);
1150 }
1151 
1152 void Assembler::adcl(Register dst, int32_t imm32) {
1153   prefix(dst);
1154   emit_arith(0x81, 0xD0, dst, imm32);
1155 }
1156 
1157 void Assembler::adcl(Register dst, Address src) {
1158   InstructionMark im(this);
1159   prefix(src, dst);
1160   emit_int8(0x13);
1161   emit_operand(dst, src);
1162 }
1163 
1164 void Assembler::adcl(Register dst, Register src) {
1165   (void) prefix_and_encode(dst->encoding(), src->encoding());
1166   emit_arith(0x13, 0xC0, dst, src);
1167 }
1168 
1169 void Assembler::addl(Address dst, int32_t imm32) {
1170   InstructionMark im(this);
1171   prefix(dst);
1172   emit_arith_operand(0x81, rax, dst, imm32);
1173 }
1174 
1175 void Assembler::addb(Register dst, Register src) {
1176   (void)prefix_and_encode(dst->encoding(), src->encoding());
1177   emit_arith(0x02, 0xC0, dst, src);
1178 }
1179 
1180 void Assembler::addb(Address dst, int imm8) {
1181   InstructionMark im(this);
1182   prefix(dst);
1183   emit_int8((unsigned char)0x80);
1184   emit_operand(rax, dst, 1);
1185   emit_int8(imm8);
1186 }
1187 
1188 void Assembler::addw(Register dst, Register src) {
1189   (void)prefix_and_encode(dst->encoding(), src->encoding());
1190   emit_arith(0x03, 0xC0, dst, src);
1191 }
1192 
1193 void Assembler::addw(Address dst, int imm16) {
1194   InstructionMark im(this);
1195   emit_int8(0x66);
1196   prefix(dst);
1197   emit_int8((unsigned char)0x81);
1198   emit_operand(rax, dst, 2);
1199   emit_int16(imm16);
1200 }
1201 
1202 void Assembler::addl(Address dst, Register src) {
1203   InstructionMark im(this);
1204   prefix(dst, src);
1205   emit_int8(0x01);
1206   emit_operand(src, dst);
1207 }
1208 
1209 void Assembler::addl(Register dst, int32_t imm32) {
1210   prefix(dst);
1211   emit_arith(0x81, 0xC0, dst, imm32);
1212 }
1213 
1214 void Assembler::addl(Register dst, Address src) {
1215   InstructionMark im(this);
1216   prefix(src, dst);
1217   emit_int8(0x03);
1218   emit_operand(dst, src);
1219 }
1220 
1221 void Assembler::addl(Register dst, Register src) {
1222   (void) prefix_and_encode(dst->encoding(), src->encoding());
1223   emit_arith(0x03, 0xC0, dst, src);
1224 }
1225 
1226 void Assembler::addr_nop_4() {
1227   assert(UseAddressNop, "no CPU support");
1228   // 4 bytes: NOP DWORD PTR [EAX+0]
1229   emit_int8(0x0F);
1230   emit_int8(0x1F);
1231   emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
1232   emit_int8(0);    // 8-bits offset (1 byte)
1233 }
1234 
1235 void Assembler::addr_nop_5() {
1236   assert(UseAddressNop, "no CPU support");
1237   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
1238   emit_int8(0x0F);
1239   emit_int8(0x1F);
1240   emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
1241   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1242   emit_int8(0);    // 8-bits offset (1 byte)
1243 }
1244 
1245 void Assembler::addr_nop_7() {
1246   assert(UseAddressNop, "no CPU support");
1247   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
1248   emit_int8(0x0F);
1249   emit_int8(0x1F);
1250   emit_int8((unsigned char)0x80);
1251                    // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
1252   emit_int32(0);   // 32-bits offset (4 bytes)
1253 }
1254 
1255 void Assembler::addr_nop_8() {
1256   assert(UseAddressNop, "no CPU support");
1257   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1258   emit_int8(0x0F);
1259   emit_int8(0x1F);
1260   emit_int8((unsigned char)0x84);
1261                    // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
1262   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1263   emit_int32(0);   // 32-bits offset (4 bytes)
1264 }
1265 
1266 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
1267   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1268   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1269   attributes.set_rex_vex_w_reverted();
1270   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1271   emit_int8(0x58);
1272   emit_int8((unsigned char)(0xC0 | encode));
1273 }
1274 
1275 void Assembler::addsd(XMMRegister dst, Address src) {
1276   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1277   InstructionMark im(this);
1278   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1279   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1280   attributes.set_rex_vex_w_reverted();
1281   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1282   emit_int8(0x58);
1283   emit_operand(dst, src);
1284 }
1285 
1286 void Assembler::addss(XMMRegister dst, XMMRegister src) {
1287   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1288   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1289   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1290   emit_int8(0x58);
1291   emit_int8((unsigned char)(0xC0 | encode));
1292 }
1293 
1294 void Assembler::addss(XMMRegister dst, Address src) {
1295   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1296   InstructionMark im(this);
1297   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1298   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1299   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1300   emit_int8(0x58);
1301   emit_operand(dst, src);
1302 }
1303 
1304 void Assembler::aesdec(XMMRegister dst, Address src) {
1305   assert(VM_Version::supports_aes(), "");
1306   InstructionMark im(this);
1307   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1308   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1309   emit_int8((unsigned char)0xDE);
1310   emit_operand(dst, src);
1311 }
1312 
1313 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1314   assert(VM_Version::supports_aes(), "");
1315   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1316   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1317   emit_int8((unsigned char)0xDE);
1318   emit_int8(0xC0 | encode);
1319 }
1320 
1321 void Assembler::aesdeclast(XMMRegister dst, Address src) {
1322   assert(VM_Version::supports_aes(), "");
1323   InstructionMark im(this);
1324   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1325   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1326   emit_int8((unsigned char)0xDF);
1327   emit_operand(dst, src);
1328 }
1329 
1330 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1331   assert(VM_Version::supports_aes(), "");
1332   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1333   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1334   emit_int8((unsigned char)0xDF);
1335   emit_int8((unsigned char)(0xC0 | encode));
1336 }
1337 
1338 void Assembler::aesenc(XMMRegister dst, Address src) {
1339   assert(VM_Version::supports_aes(), "");
1340   InstructionMark im(this);
1341   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1342   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1343   emit_int8((unsigned char)0xDC);
1344   emit_operand(dst, src);
1345 }
1346 
1347 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1348   assert(VM_Version::supports_aes(), "");
1349   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1350   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1351   emit_int8((unsigned char)0xDC);
1352   emit_int8(0xC0 | encode);
1353 }
1354 
1355 void Assembler::aesenclast(XMMRegister dst, Address src) {
1356   assert(VM_Version::supports_aes(), "");
1357   InstructionMark im(this);
1358   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1359   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1360   emit_int8((unsigned char)0xDD);
1361   emit_operand(dst, src);
1362 }
1363 
1364 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1365   assert(VM_Version::supports_aes(), "");
1366   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1367   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1368   emit_int8((unsigned char)0xDD);
1369   emit_int8((unsigned char)(0xC0 | encode));
1370 }
1371 
1372 void Assembler::andb(Register dst, Register src) {
1373   (void)prefix_and_encode(dst->encoding(), src->encoding());
1374   emit_arith(0x22, 0xC0, dst, src);
1375 }
1376 
1377 void Assembler::andw(Register dst, Register src) {
1378   (void)prefix_and_encode(dst->encoding(), src->encoding());
1379   emit_arith(0x23, 0xC0, dst, src);
1380 }
1381 
1382 void Assembler::andl(Address dst, int32_t imm32) {
1383   InstructionMark im(this);
1384   prefix(dst);
1385   emit_int8((unsigned char)0x81);
1386   emit_operand(rsp, dst, 4);
1387   emit_int32(imm32);
1388 }
1389 
1390 void Assembler::andl(Register dst, int32_t imm32) {
1391   prefix(dst);
1392   emit_arith(0x81, 0xE0, dst, imm32);
1393 }
1394 
1395 void Assembler::andl(Register dst, Address src) {
1396   InstructionMark im(this);
1397   prefix(src, dst);
1398   emit_int8(0x23);
1399   emit_operand(dst, src);
1400 }
1401 
1402 void Assembler::andl(Register dst, Register src) {
1403   (void) prefix_and_encode(dst->encoding(), src->encoding());
1404   emit_arith(0x23, 0xC0, dst, src);
1405 }
1406 
1407 void Assembler::andnl(Register dst, Register src1, Register src2) {
1408   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1409   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1410   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1411   emit_int8((unsigned char)0xF2);
1412   emit_int8((unsigned char)(0xC0 | encode));
1413 }
1414 
1415 void Assembler::andnl(Register dst, Register src1, Address src2) {
1416   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1417   InstructionMark im(this);
1418   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1419   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1420   emit_int8((unsigned char)0xF2);
1421   emit_operand(dst, src2);
1422 }
1423 
1424 void Assembler::bsfl(Register dst, Register src) {
1425   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1426   emit_int8(0x0F);
1427   emit_int8((unsigned char)0xBC);
1428   emit_int8((unsigned char)(0xC0 | encode));
1429 }
1430 
1431 void Assembler::bsrl(Register dst, Register src) {
1432   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1433   emit_int8(0x0F);
1434   emit_int8((unsigned char)0xBD);
1435   emit_int8((unsigned char)(0xC0 | encode));
1436 }
1437 
1438 void Assembler::bswapl(Register reg) { // bswap
1439   int encode = prefix_and_encode(reg->encoding());
1440   emit_int8(0x0F);
1441   emit_int8((unsigned char)(0xC8 | encode));
1442 }
1443 
1444 void Assembler::blsil(Register dst, Register src) {
1445   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1446   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1447   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1448   emit_int8((unsigned char)0xF3);
1449   emit_int8((unsigned char)(0xC0 | encode));
1450 }
1451 
1452 void Assembler::blsil(Register dst, Address src) {
1453   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1454   InstructionMark im(this);
1455   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1456   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1457   emit_int8((unsigned char)0xF3);
1458   emit_operand(rbx, src);
1459 }
1460 
1461 void Assembler::blsmskl(Register dst, Register src) {
1462   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1463   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1464   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1465   emit_int8((unsigned char)0xF3);
1466   emit_int8((unsigned char)(0xC0 | encode));
1467 }
1468 
1469 void Assembler::blsmskl(Register dst, Address src) {
1470   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1471   InstructionMark im(this);
1472   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1473   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1474   emit_int8((unsigned char)0xF3);
1475   emit_operand(rdx, src);
1476 }
1477 
1478 void Assembler::blsrl(Register dst, Register src) {
1479   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1480   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1481   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1482   emit_int8((unsigned char)0xF3);
1483   emit_int8((unsigned char)(0xC0 | encode));
1484 }
1485 
1486 void Assembler::blsrl(Register dst, Address src) {
1487   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1488   InstructionMark im(this);
1489   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1490   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1491   emit_int8((unsigned char)0xF3);
1492   emit_operand(rcx, src);
1493 }
1494 
1495 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1496   // suspect disp32 is always good
1497   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1498 
1499   if (L.is_bound()) {
1500     const int long_size = 5;
1501     int offs = (int)( target(L) - pc() );
1502     assert(offs <= 0, "assembler error");
1503     InstructionMark im(this);
1504     // 1110 1000 #32-bit disp
1505     emit_int8((unsigned char)0xE8);
1506     emit_data(offs - long_size, rtype, operand);
1507   } else {
1508     InstructionMark im(this);
1509     // 1110 1000 #32-bit disp
1510     L.add_patch_at(code(), locator());
1511 
1512     emit_int8((unsigned char)0xE8);
1513     emit_data(int(0), rtype, operand);
1514   }
1515 }
1516 
1517 void Assembler::call(Register dst) {
1518   int encode = prefix_and_encode(dst->encoding());
1519   emit_int8((unsigned char)0xFF);
1520   emit_int8((unsigned char)(0xD0 | encode));
1521 }
1522 
1523 
1524 void Assembler::call(Address adr) {
1525   InstructionMark im(this);
1526   prefix(adr);
1527   emit_int8((unsigned char)0xFF);
1528   emit_operand(rdx, adr);
1529 }
1530 
1531 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1532   InstructionMark im(this);
1533   emit_int8((unsigned char)0xE8);
1534   intptr_t disp = entry - (pc() + sizeof(int32_t));
1535   // Entry is NULL in case of a scratch emit.
1536   assert(entry == NULL || is_simm32(disp), "disp=" INTPTR_FORMAT " must be 32bit offset (call2)", disp);
1537   // Technically, should use call32_operand, but this format is
1538   // implied by the fact that we're emitting a call instruction.
1539 
1540   int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1541   emit_data((int) disp, rspec, operand);
1542 }
1543 
1544 void Assembler::cdql() {
1545   emit_int8((unsigned char)0x99);
1546 }
1547 
1548 void Assembler::cld() {
1549   emit_int8((unsigned char)0xFC);
1550 }
1551 
1552 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1553   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1554   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1555   emit_int8(0x0F);
1556   emit_int8(0x40 | cc);
1557   emit_int8((unsigned char)(0xC0 | encode));
1558 }
1559 
1560 
1561 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1562   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1563   prefix(src, dst);
1564   emit_int8(0x0F);
1565   emit_int8(0x40 | cc);
1566   emit_operand(dst, src);
1567 }
1568 
1569 void Assembler::cmpb(Address dst, int imm8) {
1570   InstructionMark im(this);
1571   prefix(dst);
1572   emit_int8((unsigned char)0x80);
1573   emit_operand(rdi, dst, 1);
1574   emit_int8(imm8);
1575 }
1576 
1577 void Assembler::cmpl(Address dst, int32_t imm32) {
1578   InstructionMark im(this);
1579   prefix(dst);
1580   emit_int8((unsigned char)0x81);
1581   emit_operand(rdi, dst, 4);
1582   emit_int32(imm32);
1583 }
1584 
1585 void Assembler::cmpl(Register dst, int32_t imm32) {
1586   prefix(dst);
1587   emit_arith(0x81, 0xF8, dst, imm32);
1588 }
1589 
1590 void Assembler::cmpl(Register dst, Register src) {
1591   (void) prefix_and_encode(dst->encoding(), src->encoding());
1592   emit_arith(0x3B, 0xC0, dst, src);
1593 }
1594 
1595 void Assembler::cmpl(Register dst, Address  src) {
1596   InstructionMark im(this);
1597   prefix(src, dst);
1598   emit_int8((unsigned char)0x3B);
1599   emit_operand(dst, src);
1600 }
1601 
1602 void Assembler::cmpw(Address dst, int imm16) {
1603   InstructionMark im(this);
1604   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1605   emit_int8(0x66);
1606   emit_int8((unsigned char)0x81);
1607   emit_operand(rdi, dst, 2);
1608   emit_int16(imm16);
1609 }
1610 
1611 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1612 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1613 // The ZF is set if the compared values were equal, and cleared otherwise.
1614 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1615   InstructionMark im(this);
1616   prefix(adr, reg);
1617   emit_int8(0x0F);
1618   emit_int8((unsigned char)0xB1);
1619   emit_operand(reg, adr);
1620 }
1621 
1622 // The 8-bit cmpxchg compares the value at adr with the contents of rax,
1623 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1624 // The ZF is set if the compared values were equal, and cleared otherwise.
1625 void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg
1626   InstructionMark im(this);
1627   prefix(adr, reg, true);
1628   emit_int8(0x0F);
1629   emit_int8((unsigned char)0xB0);
1630   emit_operand(reg, adr);
1631 }
1632 
1633 void Assembler::comisd(XMMRegister dst, Address src) {
1634   // NOTE: dbx seems to decode this as comiss even though the
1635   // 0x66 is there. Strangly ucomisd comes out correct
1636   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1637   InstructionMark im(this);
1638   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);;
1639   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1640   attributes.set_rex_vex_w_reverted();
1641   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1642   emit_int8(0x2F);
1643   emit_operand(dst, src);
1644 }
1645 
1646 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1647   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1648   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1649   attributes.set_rex_vex_w_reverted();
1650   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1651   emit_int8(0x2F);
1652   emit_int8((unsigned char)(0xC0 | encode));
1653 }
1654 
1655 void Assembler::comiss(XMMRegister dst, Address src) {
1656   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1657   InstructionMark im(this);
1658   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1659   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1660   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1661   emit_int8(0x2F);
1662   emit_operand(dst, src);
1663 }
1664 
1665 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1666   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1667   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1668   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1669   emit_int8(0x2F);
1670   emit_int8((unsigned char)(0xC0 | encode));
1671 }
1672 
1673 void Assembler::cpuid() {
1674   emit_int8(0x0F);
1675   emit_int8((unsigned char)0xA2);
1676 }
1677 
1678 // Opcode / Instruction                      Op /  En  64 - Bit Mode     Compat / Leg Mode Description                  Implemented
1679 // F2 0F 38 F0 / r       CRC32 r32, r / m8   RM        Valid             Valid             Accumulate CRC32 on r / m8.  v
1680 // F2 REX 0F 38 F0 / r   CRC32 r32, r / m8*  RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1681 // F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8   RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1682 //
1683 // F2 0F 38 F1 / r       CRC32 r32, r / m16  RM        Valid             Valid             Accumulate CRC32 on r / m16. v
1684 //
1685 // F2 0F 38 F1 / r       CRC32 r32, r / m32  RM        Valid             Valid             Accumulate CRC32 on r / m32. v
1686 //
1687 // F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64  RM        Valid             N.E.              Accumulate CRC32 on r / m64. v
1688 void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) {
1689   assert(VM_Version::supports_sse4_2(), "");
1690   int8_t w = 0x01;
1691   Prefix p = Prefix_EMPTY;
1692 
1693   emit_int8((int8_t)0xF2);
1694   switch (sizeInBytes) {
1695   case 1:
1696     w = 0;
1697     break;
1698   case 2:
1699   case 4:
1700     break;
1701   LP64_ONLY(case 8:)
1702     // This instruction is not valid in 32 bits
1703     // Note:
1704     // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
1705     //
1706     // Page B - 72   Vol. 2C says
1707     // qwreg2 to qwreg            1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2
1708     // mem64 to qwreg             1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m
1709     //                                                                            F0!!!
1710     // while 3 - 208 Vol. 2A
1711     // F2 REX.W 0F 38 F1 / r       CRC32 r64, r / m64             RM         Valid      N.E.Accumulate CRC32 on r / m64.
1712     //
1713     // the 0 on a last bit is reserved for a different flavor of this instruction :
1714     // F2 REX.W 0F 38 F0 / r       CRC32 r64, r / m8              RM         Valid      N.E.Accumulate CRC32 on r / m8.
1715     p = REX_W;
1716     break;
1717   default:
1718     assert(0, "Unsupported value for a sizeInBytes argument");
1719     break;
1720   }
1721   LP64_ONLY(prefix(crc, v, p);)
1722   emit_int8((int8_t)0x0F);
1723   emit_int8(0x38);
1724   emit_int8((int8_t)(0xF0 | w));
1725   emit_int8(0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7));
1726 }
1727 
1728 void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) {
1729   assert(VM_Version::supports_sse4_2(), "");
1730   InstructionMark im(this);
1731   int8_t w = 0x01;
1732   Prefix p = Prefix_EMPTY;
1733 
1734   emit_int8((int8_t)0xF2);
1735   switch (sizeInBytes) {
1736   case 1:
1737     w = 0;
1738     break;
1739   case 2:
1740   case 4:
1741     break;
1742   LP64_ONLY(case 8:)
1743     // This instruction is not valid in 32 bits
1744     p = REX_W;
1745     break;
1746   default:
1747     assert(0, "Unsupported value for a sizeInBytes argument");
1748     break;
1749   }
1750   LP64_ONLY(prefix(crc, adr, p);)
1751   emit_int8((int8_t)0x0F);
1752   emit_int8(0x38);
1753   emit_int8((int8_t)(0xF0 | w));
1754   emit_operand(crc, adr);
1755 }
1756 
1757 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1758   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1759   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1760   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1761   emit_int8((unsigned char)0xE6);
1762   emit_int8((unsigned char)(0xC0 | encode));
1763 }
1764 
1765 void Assembler::vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len) {
1766   assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
1767   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
1768   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1769   emit_int8((unsigned char)0xE6);
1770   emit_int8((unsigned char)(0xC0 | encode));
1771 }
1772 
1773 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1774   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1775   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1776   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1777   emit_int8(0x5B);
1778   emit_int8((unsigned char)(0xC0 | encode));
1779 }
1780 
1781 void Assembler::vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len) {
1782   assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
1783   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
1784   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1785   emit_int8(0x5B);
1786   emit_int8((unsigned char)(0xC0 | encode));
1787 }
1788 
1789 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1790   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1791   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1792   attributes.set_rex_vex_w_reverted();
1793   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1794   emit_int8(0x5A);
1795   emit_int8((unsigned char)(0xC0 | encode));
1796 }
1797 
1798 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1799   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1800   InstructionMark im(this);
1801   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1802   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1803   attributes.set_rex_vex_w_reverted();
1804   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1805   emit_int8(0x5A);
1806   emit_operand(dst, src);
1807 }
1808 
1809 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1810   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1811   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1812   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1813   emit_int8(0x2A);
1814   emit_int8((unsigned char)(0xC0 | encode));
1815 }
1816 
1817 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1818   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1819   InstructionMark im(this);
1820   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1821   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1822   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1823   emit_int8(0x2A);
1824   emit_operand(dst, src);
1825 }
1826 
1827 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1828   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1829   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1830   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1831   emit_int8(0x2A);
1832   emit_int8((unsigned char)(0xC0 | encode));
1833 }
1834 
1835 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1836   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1837   InstructionMark im(this);
1838   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1839   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1840   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1841   emit_int8(0x2A);
1842   emit_operand(dst, src);
1843 }
1844 
1845 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
1846   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1847   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1848   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1849   emit_int8(0x2A);
1850   emit_int8((unsigned char)(0xC0 | encode));
1851 }
1852 
1853 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1854   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1855   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1856   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1857   emit_int8(0x5A);
1858   emit_int8((unsigned char)(0xC0 | encode));
1859 }
1860 
1861 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1862   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1863   InstructionMark im(this);
1864   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1865   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1866   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1867   emit_int8(0x5A);
1868   emit_operand(dst, src);
1869 }
1870 
1871 
1872 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1873   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1874   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1875   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1876   emit_int8(0x2C);
1877   emit_int8((unsigned char)(0xC0 | encode));
1878 }
1879 
1880 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1881   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1882   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1883   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1884   emit_int8(0x2C);
1885   emit_int8((unsigned char)(0xC0 | encode));
1886 }
1887 
1888 void Assembler::cvttpd2dq(XMMRegister dst, XMMRegister src) {
1889   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1890   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
1891   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1892   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1893   emit_int8((unsigned char)0xE6);
1894   emit_int8((unsigned char)(0xC0 | encode));
1895 }
1896 
1897 void Assembler::vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len) {
1898   assert(UseAVX > 0 && (vector_len == AVX_128bit || vector_len == AVX_256bit), "");
1899   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1900   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1901   emit_int8((unsigned char)0x5A);
1902   emit_int8((unsigned char)(0xC0 | encode));
1903 }
1904 
1905 void Assembler::evcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len) {
1906   assert(UseAVX > 2, "");
1907   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1908   attributes.set_is_evex_instruction();
1909   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1910   emit_int8((unsigned char)0x5A);
1911   emit_int8((unsigned char)(0xC0 | encode));
1912 }
1913 
1914 void Assembler::pabsb(XMMRegister dst, XMMRegister src) {
1915   assert(VM_Version::supports_ssse3(), "");
1916   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1917   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1918   emit_int8(0x1C);
1919   emit_int8((unsigned char)(0xC0 | encode));
1920 }
1921 
1922 void Assembler::pabsw(XMMRegister dst, XMMRegister src) {
1923   assert(VM_Version::supports_ssse3(), "");
1924   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1925   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1926   emit_int8(0x1D);
1927   emit_int8((unsigned char)(0xC0 | encode));
1928 }
1929 
1930 void Assembler::vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len) {
1931   assert(UseAVX > 0 && (vector_len == AVX_128bit || vector_len == AVX_256bit), "");
1932   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1933   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1934   emit_int8((unsigned char)0x5A);
1935   emit_int8((unsigned char)(0xC0 | encode));
1936 }
1937 
1938 void Assembler::evcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len) {
1939   assert(UseAVX > 2, "");
1940   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1941   attributes.set_is_evex_instruction();
1942   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1943   emit_int8((unsigned char)0x5A);
1944   emit_int8((unsigned char)(0xC0 | encode));
1945 }
1946 
1947 void Assembler::evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len) {
1948   assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
1949   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1950   attributes.set_is_evex_instruction();
1951   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1952   emit_int8((unsigned char)0x5B);
1953   emit_int8((unsigned char)(0xC0 | encode));
1954 }
1955 
1956 void Assembler::evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len) {
1957   assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
1958   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1959   attributes.set_is_evex_instruction();
1960   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1961   emit_int8((unsigned char)0xE6);
1962   emit_int8((unsigned char)(0xC0 | encode));
1963 }
1964 
1965 void Assembler::evpmovwb(XMMRegister dst, XMMRegister src, int vector_len) {
1966   assert(UseAVX > 2, "");
1967   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1968   attributes.set_is_evex_instruction();
1969   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
1970   emit_int8((unsigned char)0x30);
1971   emit_int8((unsigned char)(0xC0 | encode));
1972 }
1973 
1974 void Assembler::evpmovdw(XMMRegister dst, XMMRegister src, int vector_len) {
1975   assert(UseAVX > 2, "");
1976   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1977   attributes.set_is_evex_instruction();
1978   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
1979   emit_int8((unsigned char)0x33);
1980   emit_int8((unsigned char)(0xC0 | encode));
1981 }
1982 
1983 void Assembler::evpmovdb(XMMRegister dst, XMMRegister src, int vector_len) {
1984   assert(UseAVX > 2, "");
1985   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1986   attributes.set_is_evex_instruction();
1987   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
1988   emit_int8((unsigned char)0x31);
1989   emit_int8((unsigned char)(0xC0 | encode));
1990 }
1991 
1992 void Assembler::evpmovqd(XMMRegister dst, XMMRegister src, int vector_len) {
1993   assert(UseAVX > 2, "");
1994   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1995   attributes.set_is_evex_instruction();
1996   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
1997   emit_int8((unsigned char)0x35);
1998   emit_int8((unsigned char)(0xC0 | encode));
1999 }
2000 
2001 void Assembler::evpmovqb(XMMRegister dst, XMMRegister src, int vector_len) {
2002   assert(UseAVX > 2, "");
2003   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2004   attributes.set_is_evex_instruction();
2005   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
2006   emit_int8((unsigned char)0x32);
2007   emit_int8((unsigned char)(0xC0 | encode));
2008 }
2009 
2010 void Assembler::evpmovqw(XMMRegister dst, XMMRegister src, int vector_len) {
2011   assert(UseAVX > 2, "");
2012   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2013   attributes.set_is_evex_instruction();
2014   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
2015   emit_int8((unsigned char)0x34);
2016   emit_int8((unsigned char)(0xC0 | encode));
2017 }
2018 
2019 void Assembler::pabsd(XMMRegister dst, XMMRegister src) {
2020   assert(VM_Version::supports_ssse3(), "");
2021   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2022   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
2023   emit_int8(0x1E);
2024   emit_int8((unsigned char)(0xC0 | encode));
2025 }
2026 
2027 void Assembler::vpabsb(XMMRegister dst, XMMRegister src, int vector_len) {
2028   assert(UseAVX > 0 && (vector_len == AVX_128bit || vector_len == AVX_256bit), "");
2029   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2030   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
2031   emit_int8((unsigned char)0x1C);
2032   emit_int8((unsigned char)(0xC0 | encode));
2033 }
2034 
2035 void Assembler::vpabsw(XMMRegister dst, XMMRegister src, int vector_len) {
2036   assert(UseAVX > 0 && (vector_len == AVX_128bit || vector_len == AVX_256bit), "");
2037   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2038   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
2039   emit_int8((unsigned char)0x1D);
2040   emit_int8((unsigned char)(0xC0 | encode));
2041 }
2042 
2043 void Assembler::vpabsd(XMMRegister dst, XMMRegister src, int vector_len) {
2044   assert(UseAVX > 0 && (vector_len == AVX_128bit || vector_len == AVX_256bit), "");
2045   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2046   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
2047   emit_int8((unsigned char)0x1E);
2048   emit_int8((unsigned char)(0xC0 | encode));
2049 }
2050 
2051 void Assembler::evpabsb(XMMRegister dst, XMMRegister src, int vector_len) {
2052   assert(UseAVX > 2, "");
2053   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2054   attributes.set_is_evex_instruction();
2055   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
2056   emit_int8((unsigned char)0x1C);
2057   emit_int8((unsigned char)(0xC0 | encode));
2058 }
2059 
2060 void Assembler::evpabsw(XMMRegister dst, XMMRegister src, int vector_len) {
2061   assert(UseAVX > 2, "");
2062   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2063   attributes.set_is_evex_instruction();
2064   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
2065   emit_int8((unsigned char)0x1D);
2066   emit_int8((unsigned char)(0xC0 | encode));
2067 }
2068 
2069 void Assembler::evpabsd(XMMRegister dst, XMMRegister src, int vector_len) {
2070   assert(UseAVX > 2, "");
2071   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2072   attributes.set_is_evex_instruction();
2073   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
2074   emit_int8((unsigned char)0x1E);
2075   emit_int8((unsigned char)(0xC0 | encode));
2076 }
2077 
2078 void Assembler::evpabsq(XMMRegister dst, XMMRegister src, int vector_len) {
2079   assert(UseAVX > 2, "");
2080   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2081   attributes.set_is_evex_instruction();
2082   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
2083   emit_int8((unsigned char)0x1F);
2084   emit_int8((unsigned char)(0xC0 | encode));
2085 }
2086 
2087 void Assembler::decl(Address dst) {
2088   // Don't use it directly. Use MacroAssembler::decrement() instead.
2089   InstructionMark im(this);
2090   prefix(dst);
2091   emit_int8((unsigned char)0xFF);
2092   emit_operand(rcx, dst);
2093 }
2094 
2095 void Assembler::divsd(XMMRegister dst, Address src) {
2096   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2097   InstructionMark im(this);
2098   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2099   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2100   attributes.set_rex_vex_w_reverted();
2101   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2102   emit_int8(0x5E);
2103   emit_operand(dst, src);
2104 }
2105 
2106 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
2107   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2108   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2109   attributes.set_rex_vex_w_reverted();
2110   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2111   emit_int8(0x5E);
2112   emit_int8((unsigned char)(0xC0 | encode));
2113 }
2114 
2115 void Assembler::divss(XMMRegister dst, Address src) {
2116   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2117   InstructionMark im(this);
2118   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2119   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2120   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2121   emit_int8(0x5E);
2122   emit_operand(dst, src);
2123 }
2124 
2125 void Assembler::divss(XMMRegister dst, XMMRegister src) {
2126   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2127   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2128   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2129   emit_int8(0x5E);
2130   emit_int8((unsigned char)(0xC0 | encode));
2131 }
2132 
2133 void Assembler::emms() {
2134   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
2135   emit_int8(0x0F);
2136   emit_int8(0x77);
2137 }
2138 
2139 void Assembler::hlt() {
2140   emit_int8((unsigned char)0xF4);
2141 }
2142 
2143 void Assembler::idivl(Register src) {
2144   int encode = prefix_and_encode(src->encoding());
2145   emit_int8((unsigned char)0xF7);
2146   emit_int8((unsigned char)(0xF8 | encode));
2147 }
2148 
2149 void Assembler::divl(Register src) { // Unsigned
2150   int encode = prefix_and_encode(src->encoding());
2151   emit_int8((unsigned char)0xF7);
2152   emit_int8((unsigned char)(0xF0 | encode));
2153 }
2154 
2155 void Assembler::imull(Register src) {
2156   int encode = prefix_and_encode(src->encoding());
2157   emit_int8((unsigned char)0xF7);
2158   emit_int8((unsigned char)(0xE8 | encode));
2159 }
2160 
2161 void Assembler::imull(Register dst, Register src) {
2162   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2163   emit_int8(0x0F);
2164   emit_int8((unsigned char)0xAF);
2165   emit_int8((unsigned char)(0xC0 | encode));
2166 }
2167 
2168 
2169 void Assembler::imull(Register dst, Register src, int value) {
2170   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2171   if (is8bit(value)) {
2172     emit_int8(0x6B);
2173     emit_int8((unsigned char)(0xC0 | encode));
2174     emit_int8(value & 0xFF);
2175   } else {
2176     emit_int8(0x69);
2177     emit_int8((unsigned char)(0xC0 | encode));
2178     emit_int32(value);
2179   }
2180 }
2181 
2182 void Assembler::imull(Register dst, Address src) {
2183   InstructionMark im(this);
2184   prefix(src, dst);
2185   emit_int8(0x0F);
2186   emit_int8((unsigned char) 0xAF);
2187   emit_operand(dst, src);
2188 }
2189 
2190 
2191 void Assembler::incl(Address dst) {
2192   // Don't use it directly. Use MacroAssembler::increment() instead.
2193   InstructionMark im(this);
2194   prefix(dst);
2195   emit_int8((unsigned char)0xFF);
2196   emit_operand(rax, dst);
2197 }
2198 
2199 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
2200   InstructionMark im(this);
2201   assert((0 <= cc) && (cc < 16), "illegal cc");
2202   if (L.is_bound()) {
2203     address dst = target(L);
2204     assert(dst != NULL, "jcc most probably wrong");
2205 
2206     const int short_size = 2;
2207     const int long_size = 6;
2208     intptr_t offs = (intptr_t)dst - (intptr_t)pc();
2209     if (maybe_short && is8bit(offs - short_size)) {
2210       // 0111 tttn #8-bit disp
2211       emit_int8(0x70 | cc);
2212       emit_int8((offs - short_size) & 0xFF);
2213     } else {
2214       // 0000 1111 1000 tttn #32-bit disp
2215       assert(is_simm32(offs - long_size),
2216              "must be 32bit offset (call4)");
2217       emit_int8(0x0F);
2218       emit_int8((unsigned char)(0x80 | cc));
2219       emit_int32(offs - long_size);
2220     }
2221   } else {
2222     // Note: could eliminate cond. jumps to this jump if condition
2223     //       is the same however, seems to be rather unlikely case.
2224     // Note: use jccb() if label to be bound is very close to get
2225     //       an 8-bit displacement
2226     L.add_patch_at(code(), locator());
2227     emit_int8(0x0F);
2228     emit_int8((unsigned char)(0x80 | cc));
2229     emit_int32(0);
2230   }
2231 }
2232 
2233 void Assembler::jccb(Condition cc, Label& L) {
2234   if (L.is_bound()) {
2235     const int short_size = 2;
2236     address entry = target(L);
2237 #ifdef ASSERT
2238     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2239     intptr_t delta = short_branch_delta();
2240     if (delta != 0) {
2241       dist += (dist < 0 ? (-delta) :delta);
2242     }
2243     assert(is8bit(dist), "Dispacement too large for a short jmp");
2244 #endif
2245     intptr_t offs = (intptr_t)entry - (intptr_t)pc();
2246     // 0111 tttn #8-bit disp
2247     emit_int8(0x70 | cc);
2248     emit_int8((offs - short_size) & 0xFF);
2249   } else {
2250     InstructionMark im(this);
2251     L.add_patch_at(code(), locator());
2252     emit_int8(0x70 | cc);
2253     emit_int8(0);
2254   }
2255 }
2256 
2257 void Assembler::jmp(Address adr) {
2258   InstructionMark im(this);
2259   prefix(adr);
2260   emit_int8((unsigned char)0xFF);
2261   emit_operand(rsp, adr);
2262 }
2263 
2264 void Assembler::jmp(Label& L, bool maybe_short) {
2265   if (L.is_bound()) {
2266     address entry = target(L);
2267     assert(entry != NULL, "jmp most probably wrong");
2268     InstructionMark im(this);
2269     const int short_size = 2;
2270     const int long_size = 5;
2271     intptr_t offs = entry - pc();
2272     if (maybe_short && is8bit(offs - short_size)) {
2273       emit_int8((unsigned char)0xEB);
2274       emit_int8((offs - short_size) & 0xFF);
2275     } else {
2276       emit_int8((unsigned char)0xE9);
2277       emit_int32(offs - long_size);
2278     }
2279   } else {
2280     // By default, forward jumps are always 32-bit displacements, since
2281     // we can't yet know where the label will be bound.  If you're sure that
2282     // the forward jump will not run beyond 256 bytes, use jmpb to
2283     // force an 8-bit displacement.
2284     InstructionMark im(this);
2285     L.add_patch_at(code(), locator());
2286     emit_int8((unsigned char)0xE9);
2287     emit_int32(0);
2288   }
2289 }
2290 
2291 void Assembler::jmp(Register entry) {
2292   int encode = prefix_and_encode(entry->encoding());
2293   emit_int8((unsigned char)0xFF);
2294   emit_int8((unsigned char)(0xE0 | encode));
2295 }
2296 
2297 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
2298   InstructionMark im(this);
2299   emit_int8((unsigned char)0xE9);
2300   assert(dest != NULL, "must have a target");
2301   intptr_t disp = dest - (pc() + sizeof(int32_t));
2302   assert(is_simm32(disp), "must be 32bit offset (jmp)");
2303   emit_data(disp, rspec.reloc(), call32_operand);
2304 }
2305 
2306 void Assembler::jmpb(Label& L) {
2307   if (L.is_bound()) {
2308     const int short_size = 2;
2309     address entry = target(L);
2310     assert(entry != NULL, "jmp most probably wrong");
2311 #ifdef ASSERT
2312     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2313     intptr_t delta = short_branch_delta();
2314     if (delta != 0) {
2315       dist += (dist < 0 ? (-delta) :delta);
2316     }
2317     assert(is8bit(dist), "Dispacement too large for a short jmp");
2318 #endif
2319     intptr_t offs = entry - pc();
2320     emit_int8((unsigned char)0xEB);
2321     emit_int8((offs - short_size) & 0xFF);
2322   } else {
2323     InstructionMark im(this);
2324     L.add_patch_at(code(), locator());
2325     emit_int8((unsigned char)0xEB);
2326     emit_int8(0);
2327   }
2328 }
2329 
2330 void Assembler::ldmxcsr( Address src) {
2331   if (UseAVX > 0 ) {
2332     InstructionMark im(this);
2333     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2334     vex_prefix(src, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2335     emit_int8((unsigned char)0xAE);
2336     emit_operand(as_Register(2), src);
2337   } else {
2338     NOT_LP64(assert(VM_Version::supports_sse(), ""));
2339     InstructionMark im(this);
2340     prefix(src);
2341     emit_int8(0x0F);
2342     emit_int8((unsigned char)0xAE);
2343     emit_operand(as_Register(2), src);
2344   }
2345 }
2346 
2347 void Assembler::leal(Register dst, Address src) {
2348   InstructionMark im(this);
2349 #ifdef _LP64
2350   emit_int8(0x67); // addr32
2351   prefix(src, dst);
2352 #endif // LP64
2353   emit_int8((unsigned char)0x8D);
2354   emit_operand(dst, src);
2355 }
2356 
2357 void Assembler::lfence() {
2358   emit_int8(0x0F);
2359   emit_int8((unsigned char)0xAE);
2360   emit_int8((unsigned char)0xE8);
2361 }
2362 
2363 void Assembler::lock() {
2364   emit_int8((unsigned char)0xF0);
2365 }
2366 
2367 void Assembler::lzcntl(Register dst, Register src) {
2368   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
2369   emit_int8((unsigned char)0xF3);
2370   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2371   emit_int8(0x0F);
2372   emit_int8((unsigned char)0xBD);
2373   emit_int8((unsigned char)(0xC0 | encode));
2374 }
2375 
2376 // Emit mfence instruction
2377 void Assembler::mfence() {
2378   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
2379   emit_int8(0x0F);
2380   emit_int8((unsigned char)0xAE);
2381   emit_int8((unsigned char)0xF0);
2382 }
2383 
2384 void Assembler::mov(Register dst, Register src) {
2385   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2386 }
2387 
2388 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
2389   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2390   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2391   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2392   attributes.set_rex_vex_w_reverted();
2393   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2394   emit_int8(0x28);
2395   emit_int8((unsigned char)(0xC0 | encode));
2396 }
2397 
2398 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
2399   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2400   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2401   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2402   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2403   emit_int8(0x28);
2404   emit_int8((unsigned char)(0xC0 | encode));
2405 }
2406 
2407 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
2408   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2409   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2410   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2411   emit_int8(0x16);
2412   emit_int8((unsigned char)(0xC0 | encode));
2413 }
2414 
2415 void Assembler::movb(Register dst, Address src) {
2416   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2417   InstructionMark im(this);
2418   prefix(src, dst, true);
2419   emit_int8((unsigned char)0x8A);
2420   emit_operand(dst, src);
2421 }
2422 
2423 void Assembler::movddup(XMMRegister dst, XMMRegister src) {
2424   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
2425   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2426   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2427   attributes.set_rex_vex_w_reverted();
2428   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2429   emit_int8(0x12);
2430   emit_int8(0xC0 | encode);
2431 }
2432 
2433 void Assembler::kmovbl(KRegister dst, Register src) {
2434   assert(VM_Version::supports_avx512dq(), "");
2435   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2436   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2437   emit_int8((unsigned char)0x92);
2438   emit_int8((unsigned char)(0xC0 | encode));
2439 }
2440 
2441 void Assembler::kmovbl(Register dst, KRegister src) {
2442   assert(VM_Version::supports_avx512dq(), "");
2443   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2444   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2445   emit_int8((unsigned char)0x93);
2446   emit_int8((unsigned char)(0xC0 | encode));
2447 }
2448 
2449 void Assembler::kmovwl(KRegister dst, Register src) {
2450   assert(VM_Version::supports_evex(), "");
2451   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2452   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2453   emit_int8((unsigned char)0x92);
2454   emit_int8((unsigned char)(0xC0 | encode));
2455 }
2456 
2457 void Assembler::kmovwl(Register dst, KRegister src) {
2458   assert(VM_Version::supports_evex(), "");
2459   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2460   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2461   emit_int8((unsigned char)0x93);
2462   emit_int8((unsigned char)(0xC0 | encode));
2463 }
2464 
2465 void Assembler::kmovwl(KRegister dst, Address src) {
2466   assert(VM_Version::supports_evex(), "");
2467   InstructionMark im(this);
2468   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2469   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2470   emit_int8((unsigned char)0x90);
2471   emit_operand((Register)dst, src);
2472 }
2473 
2474 void Assembler::kmovdl(KRegister dst, Register src) {
2475   assert(VM_Version::supports_avx512bw(), "");
2476   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2477   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2478   emit_int8((unsigned char)0x92);
2479   emit_int8((unsigned char)(0xC0 | encode));
2480 }
2481 
2482 void Assembler::kmovdl(Register dst, KRegister src) {
2483   assert(VM_Version::supports_avx512bw(), "");
2484   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2485   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2486   emit_int8((unsigned char)0x93);
2487   emit_int8((unsigned char)(0xC0 | encode));
2488 }
2489 
2490 void Assembler::kmovql(KRegister dst, KRegister src) {
2491   assert(VM_Version::supports_avx512bw(), "");
2492   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2493   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2494   emit_int8((unsigned char)0x90);
2495   emit_int8((unsigned char)(0xC0 | encode));
2496 }
2497 
2498 void Assembler::kmovql(KRegister dst, Address src) {
2499   assert(VM_Version::supports_avx512bw(), "");
2500   InstructionMark im(this);
2501   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2502   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2503   emit_int8((unsigned char)0x90);
2504   emit_operand((Register)dst, src);
2505 }
2506 
2507 void Assembler::kmovql(Address dst, KRegister src) {
2508   assert(VM_Version::supports_avx512bw(), "");
2509   InstructionMark im(this);
2510   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2511   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2512   emit_int8((unsigned char)0x90);
2513   emit_operand((Register)src, dst);
2514 }
2515 
2516 void Assembler::kmovql(KRegister dst, Register src) {
2517   assert(VM_Version::supports_avx512bw(), "");
2518   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2519   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2520   emit_int8((unsigned char)0x92);
2521   emit_int8((unsigned char)(0xC0 | encode));
2522 }
2523 
2524 void Assembler::kmovql(Register dst, KRegister src) {
2525   assert(VM_Version::supports_avx512bw(), "");
2526   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2527   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2528   emit_int8((unsigned char)0x93);
2529   emit_int8((unsigned char)(0xC0 | encode));
2530 }
2531 
2532 void Assembler::knotwl(KRegister dst, KRegister src) {
2533   assert(VM_Version::supports_evex(), "");
2534   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2535   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2536   emit_int8((unsigned char)0x44);
2537   emit_int8((unsigned char)(0xC0 | encode));
2538 }
2539 
2540 // This instruction produces ZF or CF flags
2541 void Assembler::kortestbl(KRegister src1, KRegister src2) {
2542   assert(VM_Version::supports_avx512dq(), "");
2543   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2544   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2545   emit_int8((unsigned char)0x98);
2546   emit_int8((unsigned char)(0xC0 | encode));
2547 }
2548 
2549 // This instruction produces ZF or CF flags
2550 void Assembler::kortestwl(KRegister src1, KRegister src2) {
2551   assert(VM_Version::supports_evex(), "");
2552   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2553   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2554   emit_int8((unsigned char)0x98);
2555   emit_int8((unsigned char)(0xC0 | encode));
2556 }
2557 
2558 // This instruction produces ZF or CF flags
2559 void Assembler::kortestdl(KRegister src1, KRegister src2) {
2560   assert(VM_Version::supports_avx512bw(), "");
2561   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2562   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2563   emit_int8((unsigned char)0x98);
2564   emit_int8((unsigned char)(0xC0 | encode));
2565 }
2566 
2567 // This instruction produces ZF or CF flags
2568 void Assembler::kortestql(KRegister src1, KRegister src2) {
2569   assert(VM_Version::supports_avx512bw(), "");
2570   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2571   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2572   emit_int8((unsigned char)0x98);
2573   emit_int8((unsigned char)(0xC0 | encode));
2574 }
2575 
2576 // This instruction produces ZF or CF flags
2577 void Assembler::ktestql(KRegister src1, KRegister src2) {
2578   assert(VM_Version::supports_avx512bw(), "");
2579   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2580   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2581   emit_int8((unsigned char)0x99);
2582   emit_int8((unsigned char)(0xC0 | encode));
2583 }
2584 
2585 void Assembler::ktestq(KRegister src1, KRegister src2) {
2586   assert(VM_Version::supports_avx512bw(), "");
2587   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2588   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2589   emit_int8((unsigned char)0x99);
2590   emit_int8((unsigned char)(0xC0 | encode));
2591 }
2592 
2593 void Assembler::ktestd(KRegister src1, KRegister src2) {
2594   assert(VM_Version::supports_avx512bw(), "");
2595   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2596   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2597   emit_int8((unsigned char)0x99);
2598   emit_int8((unsigned char)(0xC0 | encode));
2599 }
2600 
2601 void Assembler::movb(Address dst, int imm8) {
2602   InstructionMark im(this);
2603    prefix(dst);
2604   emit_int8((unsigned char)0xC6);
2605   emit_operand(rax, dst, 1);
2606   emit_int8(imm8);
2607 }
2608 
2609 
2610 void Assembler::movb(Address dst, Register src) {
2611   assert(src->has_byte_register(), "must have byte register");
2612   InstructionMark im(this);
2613   prefix(dst, src, true);
2614   emit_int8((unsigned char)0x88);
2615   emit_operand(src, dst);
2616 }
2617 
2618 void Assembler::movdl(XMMRegister dst, Register src) {
2619   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2620   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2621   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2622   emit_int8(0x6E);
2623   emit_int8((unsigned char)(0xC0 | encode));
2624 }
2625 
2626 void Assembler::movdl(Register dst, XMMRegister src) {
2627   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2628   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2629   // swap src/dst to get correct prefix
2630   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2631   emit_int8(0x7E);
2632   emit_int8((unsigned char)(0xC0 | encode));
2633 }
2634 
2635 void Assembler::movdl(XMMRegister dst, Address src) {
2636   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2637   InstructionMark im(this);
2638   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2639   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2640   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2641   emit_int8(0x6E);
2642   emit_operand(dst, src);
2643 }
2644 
2645 void Assembler::movdl(Address dst, XMMRegister src) {
2646   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2647   InstructionMark im(this);
2648   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2649   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2650   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2651   emit_int8(0x7E);
2652   emit_operand(src, dst);
2653 }
2654 
2655 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
2656   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2657   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2658   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2659   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2660   emit_int8(0x6F);
2661   emit_int8((unsigned char)(0xC0 | encode));
2662 }
2663 
2664 void Assembler::movdqa(XMMRegister dst, Address src) {
2665   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2666   InstructionMark im(this);
2667   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2668   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2669   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2670   emit_int8(0x6F);
2671   emit_operand(dst, src);
2672 }
2673 
2674 void Assembler::movdqu(XMMRegister dst, Address src) {
2675   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2676   InstructionMark im(this);
2677   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2678   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2679   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2680   emit_int8(0x6F);
2681   emit_operand(dst, src);
2682 }
2683 
2684 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
2685   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2686   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2687   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2688   emit_int8(0x6F);
2689   emit_int8((unsigned char)(0xC0 | encode));
2690 }
2691 
2692 void Assembler::movdqu(Address dst, XMMRegister src) {
2693   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2694   InstructionMark im(this);
2695   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2696   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2697   attributes.reset_is_clear_context();
2698   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2699   emit_int8(0x7F);
2700   emit_operand(src, dst);
2701 }
2702 
2703 // Move Unaligned 256bit Vector
2704 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2705   assert(UseAVX > 0, "");
2706   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2707   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2708   emit_int8(0x6F);
2709   emit_int8((unsigned char)(0xC0 | encode));
2710 }
2711 
2712 void Assembler::vmovdqu(XMMRegister dst, Address src) {
2713   assert(UseAVX > 0, "");
2714   InstructionMark im(this);
2715   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2716   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2717   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2718   emit_int8(0x6F);
2719   emit_operand(dst, src);
2720 }
2721 
2722 void Assembler::vmovdqu(Address dst, XMMRegister src) {
2723   assert(UseAVX > 0, "");
2724   InstructionMark im(this);
2725   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2726   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2727   attributes.reset_is_clear_context();
2728   // swap src<->dst for encoding
2729   assert(src != xnoreg, "sanity");
2730   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2731   emit_int8(0x7F);
2732   emit_operand(src, dst);
2733 }
2734 
2735 // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
2736 void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len) {
2737   assert(VM_Version::supports_evex(), "");
2738   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2739   attributes.set_is_evex_instruction();
2740   if (merge) {
2741     attributes.reset_is_clear_context();
2742   }
2743   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2744   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2745   emit_int8(0x6F);
2746   emit_int8((unsigned char)(0xC0 | encode));
2747 }
2748 
2749 void Assembler::evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len) {
2750   assert(VM_Version::supports_evex(), "");
2751   InstructionMark im(this);
2752   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2753   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2754   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2755   attributes.set_is_evex_instruction();
2756   if (merge) {
2757     attributes.reset_is_clear_context();
2758   }
2759   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2760   emit_int8(0x6F);
2761   emit_operand(dst, src);
2762 }
2763 
2764 void Assembler::evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len) {
2765   assert(VM_Version::supports_evex(), "");
2766   assert(src != xnoreg, "sanity");
2767   InstructionMark im(this);
2768   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2769   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2770   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2771   attributes.set_is_evex_instruction();
2772   if (merge) {
2773     attributes.reset_is_clear_context();
2774   }
2775   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2776   emit_int8(0x7F);
2777   emit_operand(src, dst);
2778 }
2779 
2780 void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
2781   assert(VM_Version::supports_avx512vlbw(), "");
2782   InstructionMark im(this);
2783   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
2784   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2785   attributes.set_embedded_opmask_register_specifier(mask);
2786   attributes.set_is_evex_instruction();
2787   if (merge) {
2788     attributes.reset_is_clear_context();
2789   }
2790   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2791   emit_int8(0x6F);
2792   emit_operand(dst, src);
2793 }
2794 
2795 void Assembler::evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len) {
2796   assert(VM_Version::supports_evex(), "");
2797   InstructionMark im(this);
2798   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2799   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2800   attributes.set_is_evex_instruction();
2801   if (merge) {
2802     attributes.reset_is_clear_context();
2803   }
2804   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2805   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2806   emit_int8(0x6F);
2807   emit_operand(dst, src);
2808 }
2809 
2810 void Assembler::evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
2811   assert(VM_Version::supports_avx512vlbw(), "");
2812   InstructionMark im(this);
2813   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
2814   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2815   attributes.set_embedded_opmask_register_specifier(mask);
2816   attributes.set_is_evex_instruction();
2817   if (merge) {
2818     attributes.reset_is_clear_context();
2819   }
2820   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2821   emit_int8(0x6F);
2822   emit_operand(dst, src);
2823 }
2824 
2825 void Assembler::evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len) {
2826   assert(VM_Version::supports_evex(), "");
2827   assert(src != xnoreg, "sanity");
2828   InstructionMark im(this);
2829   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2830   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2831   attributes.set_is_evex_instruction();
2832   if (merge) {
2833     attributes.reset_is_clear_context();
2834   }
2835   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2836   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2837   emit_int8(0x7F);
2838   emit_operand(src, dst);
2839 }
2840 
2841 void Assembler::evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
2842   assert(VM_Version::supports_avx512vlbw(), "");
2843   assert(src != xnoreg, "sanity");
2844   InstructionMark im(this);
2845   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2846   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2847   attributes.set_embedded_opmask_register_specifier(mask);
2848   attributes.set_is_evex_instruction();
2849   if (merge) {
2850     attributes.reset_is_clear_context();
2851   }
2852   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2853   emit_int8(0x7F);
2854   emit_operand(src, dst);
2855 }
2856 
2857 void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
2858   // Users of this routine assume k1 usage.
2859   evmovdqul(dst, k1, src, /*merge*/ false, vector_len);
2860 }
2861 
2862 void Assembler::evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
2863   assert(VM_Version::supports_evex(), "");
2864   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2865   attributes.set_embedded_opmask_register_specifier(mask);
2866   attributes.set_is_evex_instruction();
2867   if (merge) {
2868     attributes.reset_is_clear_context();
2869   }
2870   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2871   emit_int8(0x6F);
2872   emit_int8((unsigned char)(0xC0 | encode));
2873 }
2874 
2875 void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) {
2876   // Users of this routine assume k1 usage.
2877   evmovdqul(dst, k1, src, /*merge*/ false, vector_len);
2878 }
2879 
2880 void Assembler::evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
2881   assert(VM_Version::supports_evex(), "");
2882   InstructionMark im(this);
2883   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false , /* uses_vl */ true);
2884   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2885   attributes.set_embedded_opmask_register_specifier(mask);
2886   attributes.set_is_evex_instruction();
2887   if (merge) {
2888     attributes.reset_is_clear_context();
2889   }
2890   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2891   emit_int8(0x6F);
2892   emit_operand(dst, src);
2893 }
2894 
2895 void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
2896   // Users of this routine assume k1 usage.
2897   evmovdqul(dst, k1, src, /*merge*/ true, vector_len);
2898 }
2899 
2900 void Assembler::evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
2901   assert(VM_Version::supports_evex(), "");
2902   assert(src != xnoreg, "sanity");
2903   InstructionMark im(this);
2904   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2905   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2906   attributes.set_embedded_opmask_register_specifier(mask);
2907   attributes.set_is_evex_instruction();
2908   if (merge) {
2909     attributes.reset_is_clear_context();
2910   }
2911   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2912   emit_int8(0x7F);
2913   emit_operand(src, dst);
2914 }
2915 
2916 void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
2917   // Users of this routine assume k1 usage.
2918   evmovdquq(dst, k1, src, /*merge*/ false, vector_len);
2919 }
2920 
2921 void Assembler::evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
2922   assert(VM_Version::supports_evex(), "");
2923   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2924   attributes.set_embedded_opmask_register_specifier(mask);
2925   attributes.set_is_evex_instruction();
2926   if (merge) {
2927     attributes.reset_is_clear_context();
2928   }
2929   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2930   emit_int8(0x6F);
2931   emit_int8((unsigned char)(0xC0 | encode));
2932 }
2933 
2934 void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
2935   // Users of this routine assume k1 usage.
2936   evmovdquq(dst, k1, src, /*merge*/ false, vector_len);
2937 }
2938 
2939 void Assembler::evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
2940   assert(VM_Version::supports_evex(), "");
2941   InstructionMark im(this);
2942   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2943   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2944   attributes.set_embedded_opmask_register_specifier(mask);
2945   attributes.set_is_evex_instruction();
2946   if (merge) {
2947     attributes.reset_is_clear_context();
2948   }
2949   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2950   emit_int8(0x6F);
2951   emit_operand(dst, src);
2952 }
2953 
2954 void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
2955   // Users of this routine assume k1 usage.
2956   evmovdquq(dst, k1, src, /*merge*/ true, vector_len);
2957 }
2958 
2959 void Assembler::evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
2960   assert(VM_Version::supports_evex(), "");
2961   assert(src != xnoreg, "sanity");
2962   InstructionMark im(this);
2963   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2964   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2965   attributes.set_embedded_opmask_register_specifier(mask);
2966   if (merge) {
2967     attributes.reset_is_clear_context();
2968   }
2969   attributes.set_is_evex_instruction();
2970   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2971   emit_int8(0x7F);
2972   emit_operand(src, dst);
2973 }
2974 
2975 // Uses zero extension on 64bit
2976 
2977 void Assembler::movl(Register dst, int32_t imm32) {
2978   int encode = prefix_and_encode(dst->encoding());
2979   emit_int8((unsigned char)(0xB8 | encode));
2980   emit_int32(imm32);
2981 }
2982 
2983 void Assembler::movl(Register dst, Register src) {
2984   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2985   emit_int8((unsigned char)0x8B);
2986   emit_int8((unsigned char)(0xC0 | encode));
2987 }
2988 
2989 void Assembler::movl(Register dst, Address src) {
2990   InstructionMark im(this);
2991   prefix(src, dst);
2992   emit_int8((unsigned char)0x8B);
2993   emit_operand(dst, src);
2994 }
2995 
2996 void Assembler::movl(Address dst, int32_t imm32) {
2997   InstructionMark im(this);
2998   prefix(dst);
2999   emit_int8((unsigned char)0xC7);
3000   emit_operand(rax, dst, 4);
3001   emit_int32(imm32);
3002 }
3003 
3004 void Assembler::movl(Address dst, Register src) {
3005   InstructionMark im(this);
3006   prefix(dst, src);
3007   emit_int8((unsigned char)0x89);
3008   emit_operand(src, dst);
3009 }
3010 
3011 // New cpus require to use movsd and movss to avoid partial register stall
3012 // when loading from memory. But for old Opteron use movlpd instead of movsd.
3013 // The selection is done in MacroAssembler::movdbl() and movflt().
3014 void Assembler::movlpd(XMMRegister dst, Address src) {
3015   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3016   InstructionMark im(this);
3017   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3018   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3019   attributes.set_rex_vex_w_reverted();
3020   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3021   emit_int8(0x12);
3022   emit_operand(dst, src);
3023 }
3024 
3025 void Assembler::movq( MMXRegister dst, Address src ) {
3026   assert( VM_Version::supports_mmx(), "" );
3027   emit_int8(0x0F);
3028   emit_int8(0x6F);
3029   emit_operand(dst, src);
3030 }
3031 
3032 void Assembler::movq( Address dst, MMXRegister src ) {
3033   assert( VM_Version::supports_mmx(), "" );
3034   emit_int8(0x0F);
3035   emit_int8(0x7F);
3036   // workaround gcc (3.2.1-7a) bug
3037   // In that version of gcc with only an emit_operand(MMX, Address)
3038   // gcc will tail jump and try and reverse the parameters completely
3039   // obliterating dst in the process. By having a version available
3040   // that doesn't need to swap the args at the tail jump the bug is
3041   // avoided.
3042   emit_operand(dst, src);
3043 }
3044 
3045 void Assembler::movq(XMMRegister dst, Address src) {
3046   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3047   InstructionMark im(this);
3048   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3049   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3050   attributes.set_rex_vex_w_reverted();
3051   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
3052   emit_int8(0x7E);
3053   emit_operand(dst, src);
3054 }
3055 
3056 void Assembler::movq(Address dst, XMMRegister src) {
3057   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3058   InstructionMark im(this);
3059   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3060   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3061   attributes.set_rex_vex_w_reverted();
3062   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3063   emit_int8((unsigned char)0xD6);
3064   emit_operand(src, dst);
3065 }
3066 
3067 void Assembler::movq(Register dst, XMMRegister src) {
3068   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3069   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3070   // swap src/dst to get correct prefix
3071   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3072   emit_int8(0x7E);
3073   emit_int8((unsigned char)(0xC0 | encode));
3074 }
3075 
3076 void Assembler::movq(XMMRegister dst, Register src) {
3077   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3078   InstructionAttr attributes(AVX_128bit, /* rex_w */true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3079   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3080   emit_int8(0x6E);
3081   emit_int8((unsigned char)(0xC0 | encode));
3082 }
3083 
3084 void Assembler::movsbl(Register dst, Address src) { // movsxb
3085   InstructionMark im(this);
3086   prefix(src, dst);
3087   emit_int8(0x0F);
3088   emit_int8((unsigned char)0xBE);
3089   emit_operand(dst, src);
3090 }
3091 
3092 void Assembler::movsbl(Register dst, Register src) { // movsxb
3093   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
3094   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
3095   emit_int8(0x0F);
3096   emit_int8((unsigned char)0xBE);
3097   emit_int8((unsigned char)(0xC0 | encode));
3098 }
3099 
3100 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
3101   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3102   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3103   attributes.set_rex_vex_w_reverted();
3104   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3105   emit_int8(0x10);
3106   emit_int8((unsigned char)(0xC0 | encode));
3107 }
3108 
3109 void Assembler::movsd(XMMRegister dst, Address src) {
3110   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3111   InstructionMark im(this);
3112   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3113   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3114   attributes.set_rex_vex_w_reverted();
3115   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3116   emit_int8(0x10);
3117   emit_operand(dst, src);
3118 }
3119 
3120 void Assembler::movsd(Address dst, XMMRegister src) {
3121   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3122   InstructionMark im(this);
3123   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3124   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3125   attributes.reset_is_clear_context();
3126   attributes.set_rex_vex_w_reverted();
3127   simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3128   emit_int8(0x11);
3129   emit_operand(src, dst);
3130 }
3131 
3132 void Assembler::movss(XMMRegister dst, XMMRegister src) {
3133   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3134   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3135   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
3136   emit_int8(0x10);
3137   emit_int8((unsigned char)(0xC0 | encode));
3138 }
3139 
3140 void Assembler::movss(XMMRegister dst, Address src) {
3141   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3142   InstructionMark im(this);
3143   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3144   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3145   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
3146   emit_int8(0x10);
3147   emit_operand(dst, src);
3148 }
3149 
3150 void Assembler::movss(Address dst, XMMRegister src) {
3151   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3152   InstructionMark im(this);
3153   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3154   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3155   attributes.reset_is_clear_context();
3156   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
3157   emit_int8(0x11);
3158   emit_operand(src, dst);
3159 }
3160 
3161 void Assembler::movswl(Register dst, Address src) { // movsxw
3162   InstructionMark im(this);
3163   prefix(src, dst);
3164   emit_int8(0x0F);
3165   emit_int8((unsigned char)0xBF);
3166   emit_operand(dst, src);
3167 }
3168 
3169 void Assembler::movswl(Register dst, Register src) { // movsxw
3170   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3171   emit_int8(0x0F);
3172   emit_int8((unsigned char)0xBF);
3173   emit_int8((unsigned char)(0xC0 | encode));
3174 }
3175 
3176 void Assembler::movw(Address dst, int imm16) {
3177   InstructionMark im(this);
3178 
3179   emit_int8(0x66); // switch to 16-bit mode
3180   prefix(dst);
3181   emit_int8((unsigned char)0xC7);
3182   emit_operand(rax, dst, 2);
3183   emit_int16(imm16);
3184 }
3185 
3186 void Assembler::movw(Register dst, Address src) {
3187   InstructionMark im(this);
3188   emit_int8(0x66);
3189   prefix(src, dst);
3190   emit_int8((unsigned char)0x8B);
3191   emit_operand(dst, src);
3192 }
3193 
3194 void Assembler::movw(Address dst, Register src) {
3195   InstructionMark im(this);
3196   emit_int8(0x66);
3197   prefix(dst, src);
3198   emit_int8((unsigned char)0x89);
3199   emit_operand(src, dst);
3200 }
3201 
3202 void Assembler::movzbl(Register dst, Address src) { // movzxb
3203   InstructionMark im(this);
3204   prefix(src, dst);
3205   emit_int8(0x0F);
3206   emit_int8((unsigned char)0xB6);
3207   emit_operand(dst, src);
3208 }
3209 
3210 void Assembler::movzbl(Register dst, Register src) { // movzxb
3211   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
3212   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
3213   emit_int8(0x0F);
3214   emit_int8((unsigned char)0xB6);
3215   emit_int8(0xC0 | encode);
3216 }
3217 
3218 void Assembler::movzwl(Register dst, Address src) { // movzxw
3219   InstructionMark im(this);
3220   prefix(src, dst);
3221   emit_int8(0x0F);
3222   emit_int8((unsigned char)0xB7);
3223   emit_operand(dst, src);
3224 }
3225 
3226 void Assembler::movzwl(Register dst, Register src) { // movzxw
3227   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3228   emit_int8(0x0F);
3229   emit_int8((unsigned char)0xB7);
3230   emit_int8(0xC0 | encode);
3231 }
3232 
3233 void Assembler::mull(Address src) {
3234   InstructionMark im(this);
3235   prefix(src);
3236   emit_int8((unsigned char)0xF7);
3237   emit_operand(rsp, src);
3238 }
3239 
3240 void Assembler::mull(Register src) {
3241   int encode = prefix_and_encode(src->encoding());
3242   emit_int8((unsigned char)0xF7);
3243   emit_int8((unsigned char)(0xE0 | encode));
3244 }
3245 
3246 void Assembler::mulsd(XMMRegister dst, Address src) {
3247   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3248   InstructionMark im(this);
3249   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3250   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3251   attributes.set_rex_vex_w_reverted();
3252   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3253   emit_int8(0x59);
3254   emit_operand(dst, src);
3255 }
3256 
3257 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
3258   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3259   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3260   attributes.set_rex_vex_w_reverted();
3261   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3262   emit_int8(0x59);
3263   emit_int8((unsigned char)(0xC0 | encode));
3264 }
3265 
3266 void Assembler::mulss(XMMRegister dst, Address src) {
3267   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3268   InstructionMark im(this);
3269   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3270   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3271   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
3272   emit_int8(0x59);
3273   emit_operand(dst, src);
3274 }
3275 
3276 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
3277   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3278   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3279   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
3280   emit_int8(0x59);
3281   emit_int8((unsigned char)(0xC0 | encode));
3282 }
3283 
3284 void Assembler::negl(Register dst) {
3285   int encode = prefix_and_encode(dst->encoding());
3286   emit_int8((unsigned char)0xF7);
3287   emit_int8((unsigned char)(0xD8 | encode));
3288 }
3289 
3290 void Assembler::nop(int i) {
3291 #ifdef ASSERT
3292   assert(i > 0, " ");
3293   // The fancy nops aren't currently recognized by debuggers making it a
3294   // pain to disassemble code while debugging. If asserts are on clearly
3295   // speed is not an issue so simply use the single byte traditional nop
3296   // to do alignment.
3297 
3298   for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
3299   return;
3300 
3301 #endif // ASSERT
3302 
3303   if (UseAddressNop && VM_Version::is_intel()) {
3304     //
3305     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
3306     //  1: 0x90
3307     //  2: 0x66 0x90
3308     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3309     //  4: 0x0F 0x1F 0x40 0x00
3310     //  5: 0x0F 0x1F 0x44 0x00 0x00
3311     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3312     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3313     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3314     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3315     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3316     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3317 
3318     // The rest coding is Intel specific - don't use consecutive address nops
3319 
3320     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3321     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3322     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3323     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3324 
3325     while(i >= 15) {
3326       // For Intel don't generate consecutive addess nops (mix with regular nops)
3327       i -= 15;
3328       emit_int8(0x66);   // size prefix
3329       emit_int8(0x66);   // size prefix
3330       emit_int8(0x66);   // size prefix
3331       addr_nop_8();
3332       emit_int8(0x66);   // size prefix
3333       emit_int8(0x66);   // size prefix
3334       emit_int8(0x66);   // size prefix
3335       emit_int8((unsigned char)0x90);
3336                          // nop
3337     }
3338     switch (i) {
3339       case 14:
3340         emit_int8(0x66); // size prefix
3341       case 13:
3342         emit_int8(0x66); // size prefix
3343       case 12:
3344         addr_nop_8();
3345         emit_int8(0x66); // size prefix
3346         emit_int8(0x66); // size prefix
3347         emit_int8(0x66); // size prefix
3348         emit_int8((unsigned char)0x90);
3349                          // nop
3350         break;
3351       case 11:
3352         emit_int8(0x66); // size prefix
3353       case 10:
3354         emit_int8(0x66); // size prefix
3355       case 9:
3356         emit_int8(0x66); // size prefix
3357       case 8:
3358         addr_nop_8();
3359         break;
3360       case 7:
3361         addr_nop_7();
3362         break;
3363       case 6:
3364         emit_int8(0x66); // size prefix
3365       case 5:
3366         addr_nop_5();
3367         break;
3368       case 4:
3369         addr_nop_4();
3370         break;
3371       case 3:
3372         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3373         emit_int8(0x66); // size prefix
3374       case 2:
3375         emit_int8(0x66); // size prefix
3376       case 1:
3377         emit_int8((unsigned char)0x90);
3378                          // nop
3379         break;
3380       default:
3381         assert(i == 0, " ");
3382     }
3383     return;
3384   }
3385   if (UseAddressNop && VM_Version::is_amd()) {
3386     //
3387     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
3388     //  1: 0x90
3389     //  2: 0x66 0x90
3390     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3391     //  4: 0x0F 0x1F 0x40 0x00
3392     //  5: 0x0F 0x1F 0x44 0x00 0x00
3393     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3394     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3395     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3396     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3397     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3398     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3399 
3400     // The rest coding is AMD specific - use consecutive address nops
3401 
3402     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3403     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3404     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3405     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3406     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3407     //     Size prefixes (0x66) are added for larger sizes
3408 
3409     while(i >= 22) {
3410       i -= 11;
3411       emit_int8(0x66); // size prefix
3412       emit_int8(0x66); // size prefix
3413       emit_int8(0x66); // size prefix
3414       addr_nop_8();
3415     }
3416     // Generate first nop for size between 21-12
3417     switch (i) {
3418       case 21:
3419         i -= 1;
3420         emit_int8(0x66); // size prefix
3421       case 20:
3422       case 19:
3423         i -= 1;
3424         emit_int8(0x66); // size prefix
3425       case 18:
3426       case 17:
3427         i -= 1;
3428         emit_int8(0x66); // size prefix
3429       case 16:
3430       case 15:
3431         i -= 8;
3432         addr_nop_8();
3433         break;
3434       case 14:
3435       case 13:
3436         i -= 7;
3437         addr_nop_7();
3438         break;
3439       case 12:
3440         i -= 6;
3441         emit_int8(0x66); // size prefix
3442         addr_nop_5();
3443         break;
3444       default:
3445         assert(i < 12, " ");
3446     }
3447 
3448     // Generate second nop for size between 11-1
3449     switch (i) {
3450       case 11:
3451         emit_int8(0x66); // size prefix
3452       case 10:
3453         emit_int8(0x66); // size prefix
3454       case 9:
3455         emit_int8(0x66); // size prefix
3456       case 8:
3457         addr_nop_8();
3458         break;
3459       case 7:
3460         addr_nop_7();
3461         break;
3462       case 6:
3463         emit_int8(0x66); // size prefix
3464       case 5:
3465         addr_nop_5();
3466         break;
3467       case 4:
3468         addr_nop_4();
3469         break;
3470       case 3:
3471         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3472         emit_int8(0x66); // size prefix
3473       case 2:
3474         emit_int8(0x66); // size prefix
3475       case 1:
3476         emit_int8((unsigned char)0x90);
3477                          // nop
3478         break;
3479       default:
3480         assert(i == 0, " ");
3481     }
3482     return;
3483   }
3484 
3485   if (UseAddressNop && VM_Version::is_zx()) {
3486     //
3487     // Using multi-bytes nops "0x0F 0x1F [address]" for ZX
3488     //  1: 0x90
3489     //  2: 0x66 0x90
3490     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3491     //  4: 0x0F 0x1F 0x40 0x00
3492     //  5: 0x0F 0x1F 0x44 0x00 0x00
3493     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3494     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3495     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3496     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3497     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3498     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3499 
3500     // The rest coding is ZX specific - don't use consecutive address nops
3501 
3502     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3503     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3504     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3505     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3506 
3507     while (i >= 15) {
3508       // For ZX don't generate consecutive addess nops (mix with regular nops)
3509       i -= 15;
3510       emit_int8(0x66);   // size prefix
3511       emit_int8(0x66);   // size prefix
3512       emit_int8(0x66);   // size prefix
3513       addr_nop_8();
3514       emit_int8(0x66);   // size prefix
3515       emit_int8(0x66);   // size prefix
3516       emit_int8(0x66);   // size prefix
3517       emit_int8((unsigned char)0x90);
3518                          // nop
3519     }
3520     switch (i) {
3521       case 14:
3522         emit_int8(0x66); // size prefix
3523       case 13:
3524         emit_int8(0x66); // size prefix
3525       case 12:
3526         addr_nop_8();
3527         emit_int8(0x66); // size prefix
3528         emit_int8(0x66); // size prefix
3529         emit_int8(0x66); // size prefix
3530         emit_int8((unsigned char)0x90);
3531                          // nop
3532         break;
3533       case 11:
3534         emit_int8(0x66); // size prefix
3535       case 10:
3536         emit_int8(0x66); // size prefix
3537       case 9:
3538         emit_int8(0x66); // size prefix
3539       case 8:
3540         addr_nop_8();
3541         break;
3542       case 7:
3543         addr_nop_7();
3544         break;
3545       case 6:
3546         emit_int8(0x66); // size prefix
3547       case 5:
3548         addr_nop_5();
3549         break;
3550       case 4:
3551         addr_nop_4();
3552         break;
3553       case 3:
3554         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3555         emit_int8(0x66); // size prefix
3556       case 2:
3557         emit_int8(0x66); // size prefix
3558       case 1:
3559         emit_int8((unsigned char)0x90);
3560                          // nop
3561         break;
3562       default:
3563         assert(i == 0, " ");
3564     }
3565     return;
3566   }
3567 
3568   // Using nops with size prefixes "0x66 0x90".
3569   // From AMD Optimization Guide:
3570   //  1: 0x90
3571   //  2: 0x66 0x90
3572   //  3: 0x66 0x66 0x90
3573   //  4: 0x66 0x66 0x66 0x90
3574   //  5: 0x66 0x66 0x90 0x66 0x90
3575   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
3576   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
3577   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
3578   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3579   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3580   //
3581   while(i > 12) {
3582     i -= 4;
3583     emit_int8(0x66); // size prefix
3584     emit_int8(0x66);
3585     emit_int8(0x66);
3586     emit_int8((unsigned char)0x90);
3587                      // nop
3588   }
3589   // 1 - 12 nops
3590   if(i > 8) {
3591     if(i > 9) {
3592       i -= 1;
3593       emit_int8(0x66);
3594     }
3595     i -= 3;
3596     emit_int8(0x66);
3597     emit_int8(0x66);
3598     emit_int8((unsigned char)0x90);
3599   }
3600   // 1 - 8 nops
3601   if(i > 4) {
3602     if(i > 6) {
3603       i -= 1;
3604       emit_int8(0x66);
3605     }
3606     i -= 3;
3607     emit_int8(0x66);
3608     emit_int8(0x66);
3609     emit_int8((unsigned char)0x90);
3610   }
3611   switch (i) {
3612     case 4:
3613       emit_int8(0x66);
3614     case 3:
3615       emit_int8(0x66);
3616     case 2:
3617       emit_int8(0x66);
3618     case 1:
3619       emit_int8((unsigned char)0x90);
3620       break;
3621     default:
3622       assert(i == 0, " ");
3623   }
3624 }
3625 
3626 void Assembler::notl(Register dst) {
3627   int encode = prefix_and_encode(dst->encoding());
3628   emit_int8((unsigned char)0xF7);
3629   emit_int8((unsigned char)(0xD0 | encode));
3630 }
3631 
3632 void Assembler::orl(Address dst, int32_t imm32) {
3633   InstructionMark im(this);
3634   prefix(dst);
3635   emit_arith_operand(0x81, rcx, dst, imm32);
3636 }
3637 
3638 void Assembler::orl(Register dst, int32_t imm32) {
3639   prefix(dst);
3640   emit_arith(0x81, 0xC8, dst, imm32);
3641 }
3642 
3643 void Assembler::orl(Register dst, Address src) {
3644   InstructionMark im(this);
3645   prefix(src, dst);
3646   emit_int8(0x0B);
3647   emit_operand(dst, src);
3648 }
3649 
3650 void Assembler::orl(Register dst, Register src) {
3651   (void) prefix_and_encode(dst->encoding(), src->encoding());
3652   emit_arith(0x0B, 0xC0, dst, src);
3653 }
3654 
3655 void Assembler::orl(Address dst, Register src) {
3656   InstructionMark im(this);
3657   prefix(dst, src);
3658   emit_int8(0x09);
3659   emit_operand(src, dst);
3660 }
3661 
3662 void Assembler::packuswb(XMMRegister dst, Address src) {
3663   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3664   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3665   InstructionMark im(this);
3666   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3667   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3668   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3669   emit_int8(0x67);
3670   emit_operand(dst, src);
3671 }
3672 
3673 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
3674   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3675   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3676   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3677   emit_int8(0x67);
3678   emit_int8((unsigned char)(0xC0 | encode));
3679 }
3680 
3681 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3682   assert(UseAVX > 0, "some form of AVX must be enabled");
3683   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3684   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3685   emit_int8(0x67);
3686   emit_int8((unsigned char)(0xC0 | encode));
3687 }
3688 
3689 void Assembler::vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3690   assert(UseAVX > 0, "some form of AVX must be enabled");
3691   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3692   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3693   emit_int8(0x2B);
3694   emit_int8((unsigned char)(0xC0 | encode));
3695 }
3696 
3697 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
3698   assert(VM_Version::supports_avx2(), "");
3699   // VEX.256.66.0F3A.W1 00 /r ib
3700   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3701   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3702   emit_int8(0x00);
3703   emit_int8(0xC0 | encode);
3704   emit_int8(imm8);
3705 }
3706 
3707 void Assembler::vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3708   assert(VM_Version::supports_avx2(), "");
3709   // VEX.NDS.256.66.0F38.W0 36 /r
3710   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3711   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3712   emit_int8(0x36);
3713   emit_int8(0xC0 | encode);
3714 }
3715 
3716 void Assembler::vpermd(XMMRegister dst, XMMRegister nds, Address src) {
3717   assert(VM_Version::supports_avx2(), "");
3718   // VEX.NDS.256.66.0F38.W0 36 /r
3719   InstructionMark im(this);
3720   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3721   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3722   emit_int8(0x36);
3723   emit_operand(dst, src);
3724 }
3725 
3726 void Assembler::vperm2i128(XMMRegister dst,  XMMRegister nds, XMMRegister src, int imm8) {
3727   assert(VM_Version::supports_avx2(), "");
3728   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3729   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3730   emit_int8(0x46);
3731   emit_int8(0xC0 | encode);
3732   emit_int8(imm8);
3733 }
3734 
3735 void Assembler::vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
3736   assert(VM_Version::supports_avx(), "");
3737   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3738   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3739   emit_int8(0x06);
3740   emit_int8(0xC0 | encode);
3741   emit_int8(imm8);
3742 }
3743 
3744 void Assembler::vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
3745   assert(VM_Version::supports_avx(), "");
3746   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
3747   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3748   emit_int8(0x04);
3749   emit_int8(0xC0 | encode);
3750   emit_int8(imm8);
3751 }
3752 
3753 void Assembler::vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
3754   assert(VM_Version::supports_avx2(), "");
3755   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
3756   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3757   emit_int8(0x01);
3758   emit_int8(0xC0 | encode);
3759   emit_int8(imm8);
3760 }
3761 
3762 void Assembler::pause() {
3763   emit_int8((unsigned char)0xF3);
3764   emit_int8((unsigned char)0x90);
3765 }
3766 
3767 void Assembler::ud2() {
3768   emit_int8(0x0F);
3769   emit_int8(0x0B);
3770 }
3771 
3772 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
3773   assert(VM_Version::supports_sse4_2(), "");
3774   InstructionMark im(this);
3775   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3776   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3777   emit_int8(0x61);
3778   emit_operand(dst, src);
3779   emit_int8(imm8);
3780 }
3781 
3782 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
3783   assert(VM_Version::supports_sse4_2(), "");
3784   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3785   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3786   emit_int8(0x61);
3787   emit_int8((unsigned char)(0xC0 | encode));
3788   emit_int8(imm8);
3789 }
3790 
3791 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3792 void Assembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
3793   assert(VM_Version::supports_sse2(), "");
3794   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3795   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3796   emit_int8(0x74);
3797   emit_int8((unsigned char)(0xC0 | encode));
3798 }
3799 
3800 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3801 void Assembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3802   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
3803   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
3804   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3805   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3806   emit_int8(0x74);
3807   emit_int8((unsigned char)(0xC0 | encode));
3808 }
3809 
3810 // In this context, kdst is written the mask used to process the equal components
3811 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3812   assert(VM_Version::supports_avx512bw(), "");
3813   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3814   attributes.set_is_evex_instruction();
3815   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3816   emit_int8(0x74);
3817   emit_int8((unsigned char)(0xC0 | encode));
3818 }
3819 
3820 void Assembler::evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3821   assert(VM_Version::supports_avx512vlbw(), "");
3822   InstructionMark im(this);
3823   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3824   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3825   attributes.set_is_evex_instruction();
3826   int dst_enc = kdst->encoding();
3827   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3828   emit_int8(0x64);
3829   emit_operand(as_Register(dst_enc), src);
3830 }
3831 
3832 void Assembler::evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3833   assert(is_vector_masking(), "");
3834   assert(VM_Version::supports_avx512vlbw(), "");
3835   InstructionMark im(this);
3836   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3837   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3838   attributes.reset_is_clear_context();
3839   attributes.set_embedded_opmask_register_specifier(mask);
3840   attributes.set_is_evex_instruction();
3841   int dst_enc = kdst->encoding();
3842   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3843   emit_int8(0x64);
3844   emit_operand(as_Register(dst_enc), src);
3845 }
3846 
3847 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3848   assert(VM_Version::supports_avx512vlbw(), "");
3849   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3850   attributes.set_is_evex_instruction();
3851   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3852   emit_int8(0x3E);
3853   emit_int8((unsigned char)(0xC0 | encode));
3854   emit_int8(vcc);
3855 }
3856 
3857 void Assembler::evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3858   assert(is_vector_masking(), "");
3859   assert(VM_Version::supports_avx512vlbw(), "");
3860   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3861   attributes.reset_is_clear_context();
3862   attributes.set_embedded_opmask_register_specifier(mask);
3863   attributes.set_is_evex_instruction();
3864   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3865   emit_int8(0x3E);
3866   emit_int8((unsigned char)(0xC0 | encode));
3867   emit_int8(vcc);
3868 }
3869 
3870 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len) {
3871   assert(VM_Version::supports_avx512vlbw(), "");
3872   InstructionMark im(this);
3873   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3874   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3875   attributes.set_is_evex_instruction();
3876   int dst_enc = kdst->encoding();
3877   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3878   emit_int8(0x3E);
3879   emit_operand(as_Register(dst_enc), src);
3880   emit_int8(vcc);
3881 }
3882 
3883 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3884   assert(VM_Version::supports_avx512bw(), "");
3885   InstructionMark im(this);
3886   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3887   attributes.set_is_evex_instruction();
3888   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3889   int dst_enc = kdst->encoding();
3890   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3891   emit_int8(0x74);
3892   emit_operand(as_Register(dst_enc), src);
3893 }
3894 
3895 void Assembler::evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3896   assert(VM_Version::supports_avx512vlbw(), "");
3897   assert(is_vector_masking(), "");    // For stub code use only
3898   InstructionMark im(this);
3899   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_reg_mask */ false, /* uses_vl */ false);
3900   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3901   attributes.reset_is_clear_context();
3902   attributes.set_embedded_opmask_register_specifier(mask);
3903   attributes.set_is_evex_instruction();
3904   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3905   emit_int8(0x74);
3906   emit_operand(as_Register(kdst->encoding()), src);
3907 }
3908 
3909 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3910 void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
3911   assert(VM_Version::supports_sse2(), "");
3912   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3913   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3914   emit_int8(0x75);
3915   emit_int8((unsigned char)(0xC0 | encode));
3916 }
3917 
3918 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3919 void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3920   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
3921   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
3922   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3923   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3924   emit_int8(0x75);
3925   emit_int8((unsigned char)(0xC0 | encode));
3926 }
3927 
3928 // In this context, kdst is written the mask used to process the equal components
3929 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3930   assert(VM_Version::supports_avx512bw(), "");
3931   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3932   attributes.set_is_evex_instruction();
3933   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3934   emit_int8(0x75);
3935   emit_int8((unsigned char)(0xC0 | encode));
3936 }
3937 
3938 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3939   assert(VM_Version::supports_avx512bw(), "");
3940   InstructionMark im(this);
3941   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3942   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3943   attributes.set_is_evex_instruction();
3944   int dst_enc = kdst->encoding();
3945   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3946   emit_int8(0x75);
3947   emit_operand(as_Register(dst_enc), src);
3948 }
3949 
3950 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3951 void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
3952   assert(VM_Version::supports_sse2(), "");
3953   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3954   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3955   emit_int8(0x76);
3956   emit_int8((unsigned char)(0xC0 | encode));
3957 }
3958 
3959 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3960 void Assembler::vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3961   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
3962   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
3963   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3964   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3965   emit_int8((unsigned char)0x76);
3966   emit_int8((unsigned char)(0xC0 | encode));
3967 }
3968 
3969 // In this context, kdst is written the mask used to process the equal components
3970 void Assembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len) {
3971   assert(VM_Version::supports_evex(), "");
3972   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3973   attributes.set_is_evex_instruction();
3974   attributes.reset_is_clear_context();
3975   attributes.set_embedded_opmask_register_specifier(mask);
3976   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3977   emit_int8(0x76);
3978   emit_int8((unsigned char)(0xC0 | encode));
3979 }
3980 
3981 void Assembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3982   assert(VM_Version::supports_evex(), "");
3983   InstructionMark im(this);
3984   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3985   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3986   attributes.set_is_evex_instruction();
3987   attributes.reset_is_clear_context();
3988   attributes.set_embedded_opmask_register_specifier(mask);
3989   int dst_enc = kdst->encoding();
3990   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3991   emit_int8(0x76);
3992   emit_operand(as_Register(dst_enc), src);
3993 }
3994 
3995 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3996 void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) {
3997   assert(VM_Version::supports_sse4_1(), "");
3998   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3999   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4000   emit_int8(0x29);
4001   emit_int8((unsigned char)(0xC0 | encode));
4002 }
4003 
4004 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
4005 void Assembler::vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4006   assert(VM_Version::supports_avx(), "");
4007   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4008   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4009   emit_int8(0x29);
4010   emit_int8((unsigned char)(0xC0 | encode));
4011 }
4012 
4013 // In this context, kdst is written the mask used to process the equal components
4014 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
4015   assert(VM_Version::supports_evex(), "");
4016   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4017   attributes.reset_is_clear_context();
4018   attributes.set_is_evex_instruction();
4019   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4020   emit_int8(0x29);
4021   emit_int8((unsigned char)(0xC0 | encode));
4022 }
4023 
4024 // In this context, kdst is written the mask used to process the equal components
4025 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
4026   assert(VM_Version::supports_evex(), "");
4027   InstructionMark im(this);
4028   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4029   attributes.reset_is_clear_context();
4030   attributes.set_is_evex_instruction();
4031   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
4032   int dst_enc = kdst->encoding();
4033   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4034   emit_int8(0x29);
4035   emit_operand(as_Register(dst_enc), src);
4036 }
4037 
4038 void Assembler::pcmpgtq(XMMRegister dst, XMMRegister src) {
4039   assert(VM_Version::supports_sse4_1(), "");
4040   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4041   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4042   emit_int8(0x37);
4043   emit_int8((unsigned char)(0xC0 | encode));
4044 }
4045 
4046 void Assembler::pmovmskb(Register dst, XMMRegister src) {
4047   assert(VM_Version::supports_sse2(), "");
4048   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4049   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4050   emit_int8((unsigned char)0xD7);
4051   emit_int8((unsigned char)(0xC0 | encode));
4052 }
4053 
4054 void Assembler::vpmovmskb(Register dst, XMMRegister src) {
4055   assert(VM_Version::supports_avx2(), "");
4056   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4057   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4058   emit_int8((unsigned char)0xD7);
4059   emit_int8((unsigned char)(0xC0 | encode));
4060 }
4061 
4062 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
4063   assert(VM_Version::supports_sse4_1(), "");
4064   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4065   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4066   emit_int8(0x16);
4067   emit_int8((unsigned char)(0xC0 | encode));
4068   emit_int8(imm8);
4069 }
4070 
4071 void Assembler::pextrd(Address dst, XMMRegister src, int imm8) {
4072   assert(VM_Version::supports_sse4_1(), "");
4073   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4074   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4075   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4076   emit_int8(0x16);
4077   emit_operand(src, dst);
4078   emit_int8(imm8);
4079 }
4080 
4081 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
4082   assert(VM_Version::supports_sse4_1(), "");
4083   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4084   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4085   emit_int8(0x16);
4086   emit_int8((unsigned char)(0xC0 | encode));
4087   emit_int8(imm8);
4088 }
4089 
4090 void Assembler::pextrq(Address dst, XMMRegister src, int imm8) {
4091   assert(VM_Version::supports_sse4_1(), "");
4092   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4093   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4094   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4095   emit_int8(0x16);
4096   emit_operand(src, dst);
4097   emit_int8(imm8);
4098 }
4099 
4100 void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
4101   assert(VM_Version::supports_sse2(), "");
4102   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4103   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4104   emit_int8((unsigned char)0xC5);
4105   emit_int8((unsigned char)(0xC0 | encode));
4106   emit_int8(imm8);
4107 }
4108 
4109 void Assembler::pextrw(Address dst, XMMRegister src, int imm8) {
4110   assert(VM_Version::supports_sse4_1(), "");
4111   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4112   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
4113   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4114   emit_int8((unsigned char)0x15);
4115   emit_operand(src, dst);
4116   emit_int8(imm8);
4117 }
4118 
4119 void Assembler::pextrb(Register dst, XMMRegister src, int imm8) {
4120   assert(VM_Version::supports_sse4_1(), "");
4121   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4122   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4123   emit_int8(0x14);
4124   emit_int8((unsigned char)(0xC0 | encode));
4125   emit_int8(imm8);
4126 }
4127 
4128 void Assembler::pextrb(Address dst, XMMRegister src, int imm8) {
4129   assert(VM_Version::supports_sse4_1(), "");
4130   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4131   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
4132   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4133   emit_int8(0x14);
4134   emit_operand(src, dst);
4135   emit_int8(imm8);
4136 }
4137 
4138 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
4139   assert(VM_Version::supports_sse4_1(), "");
4140   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4141   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4142   emit_int8(0x22);
4143   emit_int8((unsigned char)(0xC0 | encode));
4144   emit_int8(imm8);
4145 }
4146 
4147 void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) {
4148   assert(VM_Version::supports_sse4_1(), "");
4149   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4150   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4151   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4152   emit_int8(0x22);
4153   emit_operand(dst,src);
4154   emit_int8(imm8);
4155 }
4156 
4157 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
4158   assert(VM_Version::supports_sse4_1(), "");
4159   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4160   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4161   emit_int8(0x22);
4162   emit_int8((unsigned char)(0xC0 | encode));
4163   emit_int8(imm8);
4164 }
4165 
4166 void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) {
4167   assert(VM_Version::supports_sse4_1(), "");
4168   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4169   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4170   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4171   emit_int8(0x22);
4172   emit_operand(dst, src);
4173   emit_int8(imm8);
4174 }
4175 
4176 void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
4177   assert(VM_Version::supports_sse2(), "");
4178   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4179   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4180   emit_int8((unsigned char)0xC4);
4181   emit_int8((unsigned char)(0xC0 | encode));
4182   emit_int8(imm8);
4183 }
4184 
4185 void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) {
4186   assert(VM_Version::supports_sse2(), "");
4187   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4188   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
4189   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4190   emit_int8((unsigned char)0xC4);
4191   emit_operand(dst, src);
4192   emit_int8(imm8);
4193 }
4194 
4195 void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) {
4196   assert(VM_Version::supports_sse4_1(), "");
4197   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4198   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
4199   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4200   emit_int8(0x20);
4201   emit_operand(dst, src);
4202   emit_int8(imm8);
4203 }
4204 
4205 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
4206   assert(VM_Version::supports_sse4_1(), "");
4207   InstructionMark im(this);
4208   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4209   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
4210   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4211   emit_int8(0x30);
4212   emit_operand(dst, src);
4213 }
4214 
4215 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
4216   assert(VM_Version::supports_sse4_1(), "");
4217   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4218   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4219   emit_int8(0x30);
4220   emit_int8((unsigned char)(0xC0 | encode));
4221 }
4222 
4223 void Assembler::pmovzxdq(XMMRegister dst, XMMRegister src) {
4224   assert(VM_Version::supports_sse4_1(), "");
4225   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4226   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4227   emit_int8(0x35);
4228   emit_int8((unsigned char)(0xC0 | encode));
4229 }
4230 
4231 void Assembler::pmovsxbw(XMMRegister dst, XMMRegister src) {
4232   assert(VM_Version::supports_sse4_1(), "");
4233   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4234   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4235   emit_int8(0x20);
4236   emit_int8((unsigned char)(0xC0 | encode));
4237 }
4238 
4239 void Assembler::pmovsxbd(XMMRegister dst, XMMRegister src) {
4240   assert(VM_Version::supports_sse4_1(), "");
4241   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4242   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4243   emit_int8(0x21);
4244   emit_int8((unsigned char)(0xC0 | encode));
4245 }
4246 
4247 void Assembler::pmovsxbq(XMMRegister dst, XMMRegister src) {
4248   assert(VM_Version::supports_sse4_1(), "");
4249   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4250   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4251   emit_int8(0x22);
4252   emit_int8((unsigned char)(0xC0 | encode));
4253 }
4254 
4255 void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
4256   assert(VM_Version::supports_avx(), "");
4257   InstructionMark im(this);
4258   assert(dst != xnoreg, "sanity");
4259   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4260   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
4261   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4262   emit_int8(0x30);
4263   emit_operand(dst, src);
4264 }
4265 
4266 void Assembler::evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
4267   assert(is_vector_masking(), "");
4268   assert(VM_Version::supports_avx512vlbw(), "");
4269   assert(dst != xnoreg, "sanity");
4270   InstructionMark im(this);
4271   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4272   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
4273   attributes.set_embedded_opmask_register_specifier(mask);
4274   attributes.set_is_evex_instruction();
4275   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4276   emit_int8(0x30);
4277   emit_operand(dst, src);
4278 }
4279 
4280 void Assembler::vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len) {
4281   assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), "");
4282   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4283   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4284   emit_int8(0x35);
4285   emit_int8((unsigned char)(0xC0 | encode));
4286 }
4287 
4288 void Assembler::vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len) {
4289   assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), "");
4290   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4291   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4292   emit_int8(0x31);
4293   emit_int8((unsigned char)(0xC0 | encode));
4294 }
4295 
4296 void Assembler::vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len) {
4297   assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), "");
4298   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4299   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4300   emit_int8(0x32);
4301   emit_int8((unsigned char)(0xC0 | encode));
4302 }
4303 
4304 void Assembler::vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len) {
4305   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
4306          vector_len == AVX_256bit ? VM_Version::supports_avx2() :
4307              VM_Version::supports_evex(), "");
4308   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4309   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4310   emit_int8(0x21);
4311   emit_int8((unsigned char)(0xC0 | encode));
4312 }
4313 
4314 void Assembler::vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len) {
4315   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
4316          vector_len == AVX_256bit ? VM_Version::supports_avx2() :
4317              VM_Version::supports_evex(), "");
4318   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4319   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4320   emit_int8(0x22);
4321   emit_int8((unsigned char)(0xC0 | encode));
4322 }
4323 
4324 void Assembler::vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len) {
4325   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
4326          vector_len == AVX_256bit ? VM_Version::supports_avx2() :
4327              VM_Version::supports_evex(), "");
4328   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4329   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4330   emit_int8(0x20);
4331   emit_int8((unsigned char)(0xC0 | encode));
4332 }
4333 
4334 void Assembler::vpmovsxwd(XMMRegister dst, XMMRegister src, int vector_len) {
4335   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
4336          vector_len == AVX_256bit ? VM_Version::supports_avx2() :
4337              VM_Version::supports_evex(), "");
4338   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4339   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4340   emit_int8(0x23);
4341   emit_int8((unsigned char)(0xC0 | encode));
4342 }
4343 
4344 void Assembler::vpmovsxwq(XMMRegister dst, XMMRegister src, int vector_len) {
4345   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
4346          vector_len == AVX_256bit ? VM_Version::supports_avx2() :
4347              VM_Version::supports_evex(), "");
4348   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4349   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4350   emit_int8(0x24);
4351   emit_int8((unsigned char)(0xC0 | encode));
4352 }
4353 
4354 void Assembler::vpmovsxdq(XMMRegister dst, XMMRegister src, int vector_len) {
4355   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
4356          vector_len == AVX_256bit ? VM_Version::supports_avx2() :
4357              VM_Version::supports_evex(), "");
4358   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4359   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4360   emit_int8(0x25);
4361   emit_int8((unsigned char)(0xC0 | encode));
4362 }
4363 
4364 void Assembler::evpmovwb(Address dst, XMMRegister src, int vector_len) {
4365   assert(VM_Version::supports_avx512vlbw(), "");
4366   assert(src != xnoreg, "sanity");
4367   InstructionMark im(this);
4368   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4369   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
4370   attributes.set_is_evex_instruction();
4371   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
4372   emit_int8(0x30);
4373   emit_operand(src, dst);
4374 }
4375 
4376 void Assembler::evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len) {
4377   assert(is_vector_masking(), "");
4378   assert(VM_Version::supports_avx512vlbw(), "");
4379   assert(src != xnoreg, "sanity");
4380   InstructionMark im(this);
4381   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4382   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
4383   attributes.reset_is_clear_context();
4384   attributes.set_embedded_opmask_register_specifier(mask);
4385   attributes.set_is_evex_instruction();
4386   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
4387   emit_int8(0x30);
4388   emit_operand(src, dst);
4389 }
4390 
4391 // generic
4392 void Assembler::pop(Register dst) {
4393   int encode = prefix_and_encode(dst->encoding());
4394   emit_int8(0x58 | encode);
4395 }
4396 
4397 void Assembler::popcntl(Register dst, Address src) {
4398   assert(VM_Version::supports_popcnt(), "must support");
4399   InstructionMark im(this);
4400   emit_int8((unsigned char)0xF3);
4401   prefix(src, dst);
4402   emit_int8(0x0F);
4403   emit_int8((unsigned char)0xB8);
4404   emit_operand(dst, src);
4405 }
4406 
4407 void Assembler::popcntl(Register dst, Register src) {
4408   assert(VM_Version::supports_popcnt(), "must support");
4409   emit_int8((unsigned char)0xF3);
4410   int encode = prefix_and_encode(dst->encoding(), src->encoding());
4411   emit_int8(0x0F);
4412   emit_int8((unsigned char)0xB8);
4413   emit_int8((unsigned char)(0xC0 | encode));
4414 }
4415 
4416 void Assembler::vpopcntd(XMMRegister dst, XMMRegister src, int vector_len) {
4417   assert(VM_Version::supports_vpopcntdq(), "must support vpopcntdq feature");
4418   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4419   attributes.set_is_evex_instruction();
4420   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4421   emit_int8(0x55);
4422   emit_int8((unsigned char)(0xC0 | encode));
4423 }
4424 
4425 void Assembler::popf() {
4426   emit_int8((unsigned char)0x9D);
4427 }
4428 
4429 #ifndef _LP64 // no 32bit push/pop on amd64
4430 void Assembler::popl(Address dst) {
4431   // NOTE: this will adjust stack by 8byte on 64bits
4432   InstructionMark im(this);
4433   prefix(dst);
4434   emit_int8((unsigned char)0x8F);
4435   emit_operand(rax, dst);
4436 }
4437 #endif
4438 
4439 void Assembler::prefetch_prefix(Address src) {
4440   prefix(src);
4441   emit_int8(0x0F);
4442 }
4443 
4444 void Assembler::prefetchnta(Address src) {
4445   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
4446   InstructionMark im(this);
4447   prefetch_prefix(src);
4448   emit_int8(0x18);
4449   emit_operand(rax, src); // 0, src
4450 }
4451 
4452 void Assembler::prefetchr(Address src) {
4453   assert(VM_Version::supports_3dnow_prefetch(), "must support");
4454   InstructionMark im(this);
4455   prefetch_prefix(src);
4456   emit_int8(0x0D);
4457   emit_operand(rax, src); // 0, src
4458 }
4459 
4460 void Assembler::prefetcht0(Address src) {
4461   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
4462   InstructionMark im(this);
4463   prefetch_prefix(src);
4464   emit_int8(0x18);
4465   emit_operand(rcx, src); // 1, src
4466 }
4467 
4468 void Assembler::prefetcht1(Address src) {
4469   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
4470   InstructionMark im(this);
4471   prefetch_prefix(src);
4472   emit_int8(0x18);
4473   emit_operand(rdx, src); // 2, src
4474 }
4475 
4476 void Assembler::prefetcht2(Address src) {
4477   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
4478   InstructionMark im(this);
4479   prefetch_prefix(src);
4480   emit_int8(0x18);
4481   emit_operand(rbx, src); // 3, src
4482 }
4483 
4484 void Assembler::prefetchw(Address src) {
4485   assert(VM_Version::supports_3dnow_prefetch(), "must support");
4486   InstructionMark im(this);
4487   prefetch_prefix(src);
4488   emit_int8(0x0D);
4489   emit_operand(rcx, src); // 1, src
4490 }
4491 
4492 void Assembler::prefix(Prefix p) {
4493   emit_int8(p);
4494 }
4495 
4496 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
4497   assert(VM_Version::supports_ssse3(), "");
4498   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4499   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4500   emit_int8(0x00);
4501   emit_int8((unsigned char)(0xC0 | encode));
4502 }
4503 
4504 void Assembler::vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4505   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4506          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4507          0, "");
4508   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4509   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4510   emit_int8(0x00);
4511   emit_int8((unsigned char)(0xC0 | encode));
4512 }
4513 
4514 void Assembler::pshufb(XMMRegister dst, Address src) {
4515   assert(VM_Version::supports_ssse3(), "");
4516   InstructionMark im(this);
4517   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4518   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4519   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4520   emit_int8(0x00);
4521   emit_operand(dst, src);
4522 }
4523 
4524 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
4525   assert(isByte(mode), "invalid value");
4526   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4527   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
4528   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4529   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4530   emit_int8(0x70);
4531   emit_int8((unsigned char)(0xC0 | encode));
4532   emit_int8(mode & 0xFF);
4533 }
4534 
4535 void Assembler::vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len) {
4536   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4537          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4538          0, "");
4539   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4540   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4541   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4542   emit_int8(0x70);
4543   emit_int8((unsigned char)(0xC0 | encode));
4544   emit_int8(mode & 0xFF);
4545 }
4546 
4547 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
4548   assert(isByte(mode), "invalid value");
4549   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4550   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4551   InstructionMark im(this);
4552   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4553   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4554   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4555   emit_int8(0x70);
4556   emit_operand(dst, src);
4557   emit_int8(mode & 0xFF);
4558 }
4559 
4560 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
4561   assert(isByte(mode), "invalid value");
4562   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4563   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4564   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4565   emit_int8(0x70);
4566   emit_int8((unsigned char)(0xC0 | encode));
4567   emit_int8(mode & 0xFF);
4568 }
4569 
4570 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
4571   assert(isByte(mode), "invalid value");
4572   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4573   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4574   InstructionMark im(this);
4575   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4576   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4577   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4578   emit_int8(0x70);
4579   emit_operand(dst, src);
4580   emit_int8(mode & 0xFF);
4581 }
4582 
4583 void Assembler::psrldq(XMMRegister dst, int shift) {
4584   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
4585   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4586   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4587   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4588   emit_int8(0x73);
4589   emit_int8((unsigned char)(0xC0 | encode));
4590   emit_int8(shift);
4591 }
4592 
4593 void Assembler::pslldq(XMMRegister dst, int shift) {
4594   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
4595   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4596   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4597   // XMM7 is for /7 encoding: 66 0F 73 /7 ib
4598   int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4599   emit_int8(0x73);
4600   emit_int8((unsigned char)(0xC0 | encode));
4601   emit_int8(shift);
4602 }
4603 
4604 void Assembler::ptest(XMMRegister dst, Address src) {
4605   assert(VM_Version::supports_sse4_1(), "");
4606   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4607   InstructionMark im(this);
4608   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4609   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4610   emit_int8(0x17);
4611   emit_operand(dst, src);
4612 }
4613 
4614 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
4615   assert(VM_Version::supports_sse4_1(), "");
4616   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4617   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4618   emit_int8(0x17);
4619   emit_int8((unsigned char)(0xC0 | encode));
4620 }
4621 
4622 void Assembler::vptest(XMMRegister dst, Address src) {
4623   assert(VM_Version::supports_avx(), "");
4624   InstructionMark im(this);
4625   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4626   assert(dst != xnoreg, "sanity");
4627   // swap src<->dst for encoding
4628   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4629   emit_int8(0x17);
4630   emit_operand(dst, src);
4631 }
4632 
4633 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
4634   assert(VM_Version::supports_avx(), "");
4635   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4636   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4637   emit_int8(0x17);
4638   emit_int8((unsigned char)(0xC0 | encode));
4639 }
4640 
4641 void Assembler::vptest(XMMRegister dst, XMMRegister src, int vector_len) {
4642   assert(VM_Version::supports_avx(), "");
4643   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4644   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4645   emit_int8(0x17);
4646   emit_int8((unsigned char)(0xC0 | encode));
4647 }
4648 
4649 void Assembler::punpcklbw(XMMRegister dst, Address src) {
4650   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4651   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4652   InstructionMark im(this);
4653   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
4654   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4655   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4656   emit_int8(0x60);
4657   emit_operand(dst, src);
4658 }
4659 
4660 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
4661   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4662   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
4663   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4664   emit_int8(0x60);
4665   emit_int8((unsigned char)(0xC0 | encode));
4666 }
4667 
4668 void Assembler::punpckldq(XMMRegister dst, Address src) {
4669   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4670   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4671   InstructionMark im(this);
4672   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4673   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4674   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4675   emit_int8(0x62);
4676   emit_operand(dst, src);
4677 }
4678 
4679 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
4680   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4681   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4682   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4683   emit_int8(0x62);
4684   emit_int8((unsigned char)(0xC0 | encode));
4685 }
4686 
4687 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
4688   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4689   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4690   attributes.set_rex_vex_w_reverted();
4691   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4692   emit_int8(0x6C);
4693   emit_int8((unsigned char)(0xC0 | encode));
4694 }
4695 
4696 void Assembler::push(int32_t imm32) {
4697   // in 64bits we push 64bits onto the stack but only
4698   // take a 32bit immediate
4699   emit_int8(0x68);
4700   emit_int32(imm32);
4701 }
4702 
4703 void Assembler::push(Register src) {
4704   int encode = prefix_and_encode(src->encoding());
4705 
4706   emit_int8(0x50 | encode);
4707 }
4708 
4709 void Assembler::pushf() {
4710   emit_int8((unsigned char)0x9C);
4711 }
4712 
4713 #ifndef _LP64 // no 32bit push/pop on amd64
4714 void Assembler::pushl(Address src) {
4715   // Note this will push 64bit on 64bit
4716   InstructionMark im(this);
4717   prefix(src);
4718   emit_int8((unsigned char)0xFF);
4719   emit_operand(rsi, src);
4720 }
4721 #endif
4722 
4723 void Assembler::rcll(Register dst, int imm8) {
4724   assert(isShiftCount(imm8), "illegal shift count");
4725   int encode = prefix_and_encode(dst->encoding());
4726   if (imm8 == 1) {
4727     emit_int8((unsigned char)0xD1);
4728     emit_int8((unsigned char)(0xD0 | encode));
4729   } else {
4730     emit_int8((unsigned char)0xC1);
4731     emit_int8((unsigned char)0xD0 | encode);
4732     emit_int8(imm8);
4733   }
4734 }
4735 
4736 void Assembler::rcpps(XMMRegister dst, XMMRegister src) {
4737   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4738   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4739   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4740   emit_int8(0x53);
4741   emit_int8((unsigned char)(0xC0 | encode));
4742 }
4743 
4744 void Assembler::rcpss(XMMRegister dst, XMMRegister src) {
4745   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4746   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4747   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4748   emit_int8(0x53);
4749   emit_int8((unsigned char)(0xC0 | encode));
4750 }
4751 
4752 void Assembler::rdtsc() {
4753   emit_int8((unsigned char)0x0F);
4754   emit_int8((unsigned char)0x31);
4755 }
4756 
4757 // copies data from [esi] to [edi] using rcx pointer sized words
4758 // generic
4759 void Assembler::rep_mov() {
4760   emit_int8((unsigned char)0xF3);
4761   // MOVSQ
4762   LP64_ONLY(prefix(REX_W));
4763   emit_int8((unsigned char)0xA5);
4764 }
4765 
4766 // sets rcx bytes with rax, value at [edi]
4767 void Assembler::rep_stosb() {
4768   emit_int8((unsigned char)0xF3); // REP
4769   LP64_ONLY(prefix(REX_W));
4770   emit_int8((unsigned char)0xAA); // STOSB
4771 }
4772 
4773 // sets rcx pointer sized words with rax, value at [edi]
4774 // generic
4775 void Assembler::rep_stos() {
4776   emit_int8((unsigned char)0xF3); // REP
4777   LP64_ONLY(prefix(REX_W));       // LP64:STOSQ, LP32:STOSD
4778   emit_int8((unsigned char)0xAB);
4779 }
4780 
4781 // scans rcx pointer sized words at [edi] for occurance of rax,
4782 // generic
4783 void Assembler::repne_scan() { // repne_scan
4784   emit_int8((unsigned char)0xF2);
4785   // SCASQ
4786   LP64_ONLY(prefix(REX_W));
4787   emit_int8((unsigned char)0xAF);
4788 }
4789 
4790 #ifdef _LP64
4791 // scans rcx 4 byte words at [edi] for occurance of rax,
4792 // generic
4793 void Assembler::repne_scanl() { // repne_scan
4794   emit_int8((unsigned char)0xF2);
4795   // SCASL
4796   emit_int8((unsigned char)0xAF);
4797 }
4798 #endif
4799 
4800 void Assembler::ret(int imm16) {
4801   if (imm16 == 0) {
4802     emit_int8((unsigned char)0xC3);
4803   } else {
4804     emit_int8((unsigned char)0xC2);
4805     emit_int16(imm16);
4806   }
4807 }
4808 
4809 void Assembler::sahf() {
4810 #ifdef _LP64
4811   // Not supported in 64bit mode
4812   ShouldNotReachHere();
4813 #endif
4814   emit_int8((unsigned char)0x9E);
4815 }
4816 
4817 void Assembler::sarl(Register dst, int imm8) {
4818   int encode = prefix_and_encode(dst->encoding());
4819   assert(isShiftCount(imm8), "illegal shift count");
4820   if (imm8 == 1) {
4821     emit_int8((unsigned char)0xD1);
4822     emit_int8((unsigned char)(0xF8 | encode));
4823   } else {
4824     emit_int8((unsigned char)0xC1);
4825     emit_int8((unsigned char)(0xF8 | encode));
4826     emit_int8(imm8);
4827   }
4828 }
4829 
4830 void Assembler::sarl(Register dst) {
4831   int encode = prefix_and_encode(dst->encoding());
4832   emit_int8((unsigned char)0xD3);
4833   emit_int8((unsigned char)(0xF8 | encode));
4834 }
4835 
4836 void Assembler::sbbl(Address dst, int32_t imm32) {
4837   InstructionMark im(this);
4838   prefix(dst);
4839   emit_arith_operand(0x81, rbx, dst, imm32);
4840 }
4841 
4842 void Assembler::sbbl(Register dst, int32_t imm32) {
4843   prefix(dst);
4844   emit_arith(0x81, 0xD8, dst, imm32);
4845 }
4846 
4847 
4848 void Assembler::sbbl(Register dst, Address src) {
4849   InstructionMark im(this);
4850   prefix(src, dst);
4851   emit_int8(0x1B);
4852   emit_operand(dst, src);
4853 }
4854 
4855 void Assembler::sbbl(Register dst, Register src) {
4856   (void) prefix_and_encode(dst->encoding(), src->encoding());
4857   emit_arith(0x1B, 0xC0, dst, src);
4858 }
4859 
4860 void Assembler::setb(Condition cc, Register dst) {
4861   assert(0 <= cc && cc < 16, "illegal cc");
4862   int encode = prefix_and_encode(dst->encoding(), true);
4863   emit_int8(0x0F);
4864   emit_int8((unsigned char)0x90 | cc);
4865   emit_int8((unsigned char)(0xC0 | encode));
4866 }
4867 
4868 void Assembler::palignr(XMMRegister dst, XMMRegister src, int imm8) {
4869   assert(VM_Version::supports_ssse3(), "");
4870   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
4871   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4872   emit_int8((unsigned char)0x0F);
4873   emit_int8((unsigned char)(0xC0 | encode));
4874   emit_int8(imm8);
4875 }
4876 
4877 void Assembler::vpalignr(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
4878   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4879          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4880          0, "");
4881   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
4882   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4883   emit_int8((unsigned char)0x0F);
4884   emit_int8((unsigned char)(0xC0 | encode));
4885   emit_int8(imm8);
4886 }
4887 
4888 void Assembler::pblendw(XMMRegister dst, XMMRegister src, int imm8) {
4889   assert(VM_Version::supports_sse4_1(), "");
4890   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4891   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4892   emit_int8((unsigned char)0x0E);
4893   emit_int8((unsigned char)(0xC0 | encode));
4894   emit_int8(imm8);
4895 }
4896 
4897 void Assembler::sha1rnds4(XMMRegister dst, XMMRegister src, int imm8) {
4898   assert(VM_Version::supports_sha(), "");
4899   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, /* rex_w */ false);
4900   emit_int8((unsigned char)0xCC);
4901   emit_int8((unsigned char)(0xC0 | encode));
4902   emit_int8((unsigned char)imm8);
4903 }
4904 
4905 void Assembler::sha1nexte(XMMRegister dst, XMMRegister src) {
4906   assert(VM_Version::supports_sha(), "");
4907   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4908   emit_int8((unsigned char)0xC8);
4909   emit_int8((unsigned char)(0xC0 | encode));
4910 }
4911 
4912 void Assembler::sha1msg1(XMMRegister dst, XMMRegister src) {
4913   assert(VM_Version::supports_sha(), "");
4914   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4915   emit_int8((unsigned char)0xC9);
4916   emit_int8((unsigned char)(0xC0 | encode));
4917 }
4918 
4919 void Assembler::sha1msg2(XMMRegister dst, XMMRegister src) {
4920   assert(VM_Version::supports_sha(), "");
4921   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4922   emit_int8((unsigned char)0xCA);
4923   emit_int8((unsigned char)(0xC0 | encode));
4924 }
4925 
4926 // xmm0 is implicit additional source to this instruction.
4927 void Assembler::sha256rnds2(XMMRegister dst, XMMRegister src) {
4928   assert(VM_Version::supports_sha(), "");
4929   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4930   emit_int8((unsigned char)0xCB);
4931   emit_int8((unsigned char)(0xC0 | encode));
4932 }
4933 
4934 void Assembler::sha256msg1(XMMRegister dst, XMMRegister src) {
4935   assert(VM_Version::supports_sha(), "");
4936   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4937   emit_int8((unsigned char)0xCC);
4938   emit_int8((unsigned char)(0xC0 | encode));
4939 }
4940 
4941 void Assembler::sha256msg2(XMMRegister dst, XMMRegister src) {
4942   assert(VM_Version::supports_sha(), "");
4943   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4944   emit_int8((unsigned char)0xCD);
4945   emit_int8((unsigned char)(0xC0 | encode));
4946 }
4947 
4948 
4949 void Assembler::shll(Register dst, int imm8) {
4950   assert(isShiftCount(imm8), "illegal shift count");
4951   int encode = prefix_and_encode(dst->encoding());
4952   if (imm8 == 1 ) {
4953     emit_int8((unsigned char)0xD1);
4954     emit_int8((unsigned char)(0xE0 | encode));
4955   } else {
4956     emit_int8((unsigned char)0xC1);
4957     emit_int8((unsigned char)(0xE0 | encode));
4958     emit_int8(imm8);
4959   }
4960 }
4961 
4962 void Assembler::shll(Register dst) {
4963   int encode = prefix_and_encode(dst->encoding());
4964   emit_int8((unsigned char)0xD3);
4965   emit_int8((unsigned char)(0xE0 | encode));
4966 }
4967 
4968 void Assembler::shrl(Register dst, int imm8) {
4969   assert(isShiftCount(imm8), "illegal shift count");
4970   int encode = prefix_and_encode(dst->encoding());
4971   emit_int8((unsigned char)0xC1);
4972   emit_int8((unsigned char)(0xE8 | encode));
4973   emit_int8(imm8);
4974 }
4975 
4976 void Assembler::shrl(Register dst) {
4977   int encode = prefix_and_encode(dst->encoding());
4978   emit_int8((unsigned char)0xD3);
4979   emit_int8((unsigned char)(0xE8 | encode));
4980 }
4981 
4982 // copies a single word from [esi] to [edi]
4983 void Assembler::smovl() {
4984   emit_int8((unsigned char)0xA5);
4985 }
4986 
4987 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
4988   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4989   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4990   attributes.set_rex_vex_w_reverted();
4991   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4992   emit_int8(0x51);
4993   emit_int8((unsigned char)(0xC0 | encode));
4994 }
4995 
4996 void Assembler::sqrtsd(XMMRegister dst, Address src) {
4997   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4998   InstructionMark im(this);
4999   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5000   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5001   attributes.set_rex_vex_w_reverted();
5002   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5003   emit_int8(0x51);
5004   emit_operand(dst, src);
5005 }
5006 
5007 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
5008   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5009   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5010   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5011   emit_int8(0x51);
5012   emit_int8((unsigned char)(0xC0 | encode));
5013 }
5014 
5015 void Assembler::std() {
5016   emit_int8((unsigned char)0xFD);
5017 }
5018 
5019 void Assembler::sqrtss(XMMRegister dst, Address src) {
5020   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5021   InstructionMark im(this);
5022   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5023   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5024   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5025   emit_int8(0x51);
5026   emit_operand(dst, src);
5027 }
5028 
5029 void Assembler::stmxcsr( Address dst) {
5030   if (UseAVX > 0 ) {
5031     assert(VM_Version::supports_avx(), "");
5032     InstructionMark im(this);
5033     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
5034     vex_prefix(dst, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5035     emit_int8((unsigned char)0xAE);
5036     emit_operand(as_Register(3), dst);
5037   } else {
5038     NOT_LP64(assert(VM_Version::supports_sse(), ""));
5039     InstructionMark im(this);
5040     prefix(dst);
5041     emit_int8(0x0F);
5042     emit_int8((unsigned char)0xAE);
5043     emit_operand(as_Register(3), dst);
5044   }
5045 }
5046 
5047 void Assembler::subl(Address dst, int32_t imm32) {
5048   InstructionMark im(this);
5049   prefix(dst);
5050   emit_arith_operand(0x81, rbp, dst, imm32);
5051 }
5052 
5053 void Assembler::subl(Address dst, Register src) {
5054   InstructionMark im(this);
5055   prefix(dst, src);
5056   emit_int8(0x29);
5057   emit_operand(src, dst);
5058 }
5059 
5060 void Assembler::subl(Register dst, int32_t imm32) {
5061   prefix(dst);
5062   emit_arith(0x81, 0xE8, dst, imm32);
5063 }
5064 
5065 // Force generation of a 4 byte immediate value even if it fits into 8bit
5066 void Assembler::subl_imm32(Register dst, int32_t imm32) {
5067   prefix(dst);
5068   emit_arith_imm32(0x81, 0xE8, dst, imm32);
5069 }
5070 
5071 void Assembler::subl(Register dst, Address src) {
5072   InstructionMark im(this);
5073   prefix(src, dst);
5074   emit_int8(0x2B);
5075   emit_operand(dst, src);
5076 }
5077 
5078 void Assembler::subl(Register dst, Register src) {
5079   (void) prefix_and_encode(dst->encoding(), src->encoding());
5080   emit_arith(0x2B, 0xC0, dst, src);
5081 }
5082 
5083 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
5084   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5085   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5086   attributes.set_rex_vex_w_reverted();
5087   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5088   emit_int8(0x5C);
5089   emit_int8((unsigned char)(0xC0 | encode));
5090 }
5091 
5092 void Assembler::subsd(XMMRegister dst, Address src) {
5093   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5094   InstructionMark im(this);
5095   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5096   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5097   attributes.set_rex_vex_w_reverted();
5098   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5099   emit_int8(0x5C);
5100   emit_operand(dst, src);
5101 }
5102 
5103 void Assembler::subss(XMMRegister dst, XMMRegister src) {
5104   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5105   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true , /* uses_vl */ false);
5106   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5107   emit_int8(0x5C);
5108   emit_int8((unsigned char)(0xC0 | encode));
5109 }
5110 
5111 void Assembler::subss(XMMRegister dst, Address src) {
5112   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5113   InstructionMark im(this);
5114   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5115   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5116   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5117   emit_int8(0x5C);
5118   emit_operand(dst, src);
5119 }
5120 
5121 void Assembler::testb(Register dst, int imm8) {
5122   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
5123   (void) prefix_and_encode(dst->encoding(), true);
5124   emit_arith_b(0xF6, 0xC0, dst, imm8);
5125 }
5126 
5127 void Assembler::testb(Address dst, int imm8) {
5128   InstructionMark im(this);
5129   prefix(dst);
5130   emit_int8((unsigned char)0xF6);
5131   emit_operand(rax, dst, 1);
5132   emit_int8(imm8);
5133 }
5134 
5135 void Assembler::testl(Register dst, int32_t imm32) {
5136   // not using emit_arith because test
5137   // doesn't support sign-extension of
5138   // 8bit operands
5139   int encode = dst->encoding();
5140   if (encode == 0) {
5141     emit_int8((unsigned char)0xA9);
5142   } else {
5143     encode = prefix_and_encode(encode);
5144     emit_int8((unsigned char)0xF7);
5145     emit_int8((unsigned char)(0xC0 | encode));
5146   }
5147   emit_int32(imm32);
5148 }
5149 
5150 void Assembler::testl(Register dst, Register src) {
5151   (void) prefix_and_encode(dst->encoding(), src->encoding());
5152   emit_arith(0x85, 0xC0, dst, src);
5153 }
5154 
5155 void Assembler::testl(Register dst, Address src) {
5156   InstructionMark im(this);
5157   prefix(src, dst);
5158   emit_int8((unsigned char)0x85);
5159   emit_operand(dst, src);
5160 }
5161 
5162 void Assembler::tzcntl(Register dst, Register src) {
5163   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
5164   emit_int8((unsigned char)0xF3);
5165   int encode = prefix_and_encode(dst->encoding(), src->encoding());
5166   emit_int8(0x0F);
5167   emit_int8((unsigned char)0xBC);
5168   emit_int8((unsigned char)0xC0 | encode);
5169 }
5170 
5171 void Assembler::tzcntq(Register dst, Register src) {
5172   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
5173   emit_int8((unsigned char)0xF3);
5174   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5175   emit_int8(0x0F);
5176   emit_int8((unsigned char)0xBC);
5177   emit_int8((unsigned char)(0xC0 | encode));
5178 }
5179 
5180 void Assembler::ucomisd(XMMRegister dst, Address src) {
5181   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5182   InstructionMark im(this);
5183   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5184   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5185   attributes.set_rex_vex_w_reverted();
5186   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5187   emit_int8(0x2E);
5188   emit_operand(dst, src);
5189 }
5190 
5191 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
5192   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5193   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5194   attributes.set_rex_vex_w_reverted();
5195   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5196   emit_int8(0x2E);
5197   emit_int8((unsigned char)(0xC0 | encode));
5198 }
5199 
5200 void Assembler::ucomiss(XMMRegister dst, Address src) {
5201   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5202   InstructionMark im(this);
5203   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5204   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5205   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5206   emit_int8(0x2E);
5207   emit_operand(dst, src);
5208 }
5209 
5210 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
5211   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5212   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5213   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5214   emit_int8(0x2E);
5215   emit_int8((unsigned char)(0xC0 | encode));
5216 }
5217 
5218 void Assembler::xabort(int8_t imm8) {
5219   emit_int8((unsigned char)0xC6);
5220   emit_int8((unsigned char)0xF8);
5221   emit_int8((unsigned char)(imm8 & 0xFF));
5222 }
5223 
5224 void Assembler::xaddb(Address dst, Register src) {
5225   InstructionMark im(this);
5226   prefix(dst, src, true);
5227   emit_int8(0x0F);
5228   emit_int8((unsigned char)0xC0);
5229   emit_operand(src, dst);
5230 }
5231 
5232 void Assembler::xaddw(Address dst, Register src) {
5233   InstructionMark im(this);
5234   emit_int8(0x66);
5235   prefix(dst, src);
5236   emit_int8(0x0F);
5237   emit_int8((unsigned char)0xC1);
5238   emit_operand(src, dst);
5239 }
5240 
5241 void Assembler::xaddl(Address dst, Register src) {
5242   InstructionMark im(this);
5243   prefix(dst, src);
5244   emit_int8(0x0F);
5245   emit_int8((unsigned char)0xC1);
5246   emit_operand(src, dst);
5247 }
5248 
5249 void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
5250   InstructionMark im(this);
5251   relocate(rtype);
5252   if (abort.is_bound()) {
5253     address entry = target(abort);
5254     assert(entry != NULL, "abort entry NULL");
5255     intptr_t offset = entry - pc();
5256     emit_int8((unsigned char)0xC7);
5257     emit_int8((unsigned char)0xF8);
5258     emit_int32(offset - 6); // 2 opcode + 4 address
5259   } else {
5260     abort.add_patch_at(code(), locator());
5261     emit_int8((unsigned char)0xC7);
5262     emit_int8((unsigned char)0xF8);
5263     emit_int32(0);
5264   }
5265 }
5266 
5267 void Assembler::xchgb(Register dst, Address src) { // xchg
5268   InstructionMark im(this);
5269   prefix(src, dst, true);
5270   emit_int8((unsigned char)0x86);
5271   emit_operand(dst, src);
5272 }
5273 
5274 void Assembler::xchgw(Register dst, Address src) { // xchg
5275   InstructionMark im(this);
5276   emit_int8(0x66);
5277   prefix(src, dst);
5278   emit_int8((unsigned char)0x87);
5279   emit_operand(dst, src);
5280 }
5281 
5282 void Assembler::xchgl(Register dst, Address src) { // xchg
5283   InstructionMark im(this);
5284   prefix(src, dst);
5285   emit_int8((unsigned char)0x87);
5286   emit_operand(dst, src);
5287 }
5288 
5289 void Assembler::xchgl(Register dst, Register src) {
5290   int encode = prefix_and_encode(dst->encoding(), src->encoding());
5291   emit_int8((unsigned char)0x87);
5292   emit_int8((unsigned char)(0xC0 | encode));
5293 }
5294 
5295 void Assembler::xend() {
5296   emit_int8((unsigned char)0x0F);
5297   emit_int8((unsigned char)0x01);
5298   emit_int8((unsigned char)0xD5);
5299 }
5300 
5301 void Assembler::xgetbv() {
5302   emit_int8(0x0F);
5303   emit_int8(0x01);
5304   emit_int8((unsigned char)0xD0);
5305 }
5306 
5307 void Assembler::xorl(Register dst, int32_t imm32) {
5308   prefix(dst);
5309   emit_arith(0x81, 0xF0, dst, imm32);
5310 }
5311 
5312 void Assembler::xorl(Register dst, Address src) {
5313   InstructionMark im(this);
5314   prefix(src, dst);
5315   emit_int8(0x33);
5316   emit_operand(dst, src);
5317 }
5318 
5319 void Assembler::xorl(Register dst, Register src) {
5320   (void) prefix_and_encode(dst->encoding(), src->encoding());
5321   emit_arith(0x33, 0xC0, dst, src);
5322 }
5323 
5324 void Assembler::xorb(Register dst, Address src) {
5325   InstructionMark im(this);
5326   prefix(src, dst);
5327   emit_int8(0x32);
5328   emit_operand(dst, src);
5329 }
5330 
5331 // AVX 3-operands scalar float-point arithmetic instructions
5332 
5333 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
5334   assert(VM_Version::supports_avx(), "");
5335   InstructionMark im(this);
5336   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5337   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5338   attributes.set_rex_vex_w_reverted();
5339   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5340   emit_int8(0x58);
5341   emit_operand(dst, src);
5342 }
5343 
5344 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5345   assert(VM_Version::supports_avx(), "");
5346   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5347   attributes.set_rex_vex_w_reverted();
5348   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5349   emit_int8(0x58);
5350   emit_int8((unsigned char)(0xC0 | encode));
5351 }
5352 
5353 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
5354   assert(VM_Version::supports_avx(), "");
5355   InstructionMark im(this);
5356   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5357   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5358   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5359   emit_int8(0x58);
5360   emit_operand(dst, src);
5361 }
5362 
5363 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5364   assert(VM_Version::supports_avx(), "");
5365   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5366   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5367   emit_int8(0x58);
5368   emit_int8((unsigned char)(0xC0 | encode));
5369 }
5370 
5371 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
5372   assert(VM_Version::supports_avx(), "");
5373   InstructionMark im(this);
5374   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5375   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5376   attributes.set_rex_vex_w_reverted();
5377   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5378   emit_int8(0x5E);
5379   emit_operand(dst, src);
5380 }
5381 
5382 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5383   assert(VM_Version::supports_avx(), "");
5384   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5385   attributes.set_rex_vex_w_reverted();
5386   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5387   emit_int8(0x5E);
5388   emit_int8((unsigned char)(0xC0 | encode));
5389 }
5390 
5391 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
5392   assert(VM_Version::supports_avx(), "");
5393   InstructionMark im(this);
5394   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5395   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5396   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5397   emit_int8(0x5E);
5398   emit_operand(dst, src);
5399 }
5400 
5401 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5402   assert(VM_Version::supports_avx(), "");
5403   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5404   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5405   emit_int8(0x5E);
5406   emit_int8((unsigned char)(0xC0 | encode));
5407 }
5408 
5409 void Assembler::vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
5410   assert(VM_Version::supports_fma(), "");
5411   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5412   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5413   emit_int8((unsigned char)0xB9);
5414   emit_int8((unsigned char)(0xC0 | encode));
5415 }
5416 
5417 void Assembler::vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
5418   assert(VM_Version::supports_fma(), "");
5419   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5420   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5421   emit_int8((unsigned char)0xB9);
5422   emit_int8((unsigned char)(0xC0 | encode));
5423 }
5424 
5425 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
5426   assert(VM_Version::supports_avx(), "");
5427   InstructionMark im(this);
5428   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5429   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5430   attributes.set_rex_vex_w_reverted();
5431   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5432   emit_int8(0x59);
5433   emit_operand(dst, src);
5434 }
5435 
5436 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5437   assert(VM_Version::supports_avx(), "");
5438   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5439   attributes.set_rex_vex_w_reverted();
5440   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5441   emit_int8(0x59);
5442   emit_int8((unsigned char)(0xC0 | encode));
5443 }
5444 
5445 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
5446   assert(VM_Version::supports_avx(), "");
5447   InstructionMark im(this);
5448   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5449   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5450   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5451   emit_int8(0x59);
5452   emit_operand(dst, src);
5453 }
5454 
5455 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5456   assert(VM_Version::supports_avx(), "");
5457   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5458   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5459   emit_int8(0x59);
5460   emit_int8((unsigned char)(0xC0 | encode));
5461 }
5462 
5463 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
5464   assert(VM_Version::supports_avx(), "");
5465   InstructionMark im(this);
5466   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5467   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5468   attributes.set_rex_vex_w_reverted();
5469   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5470   emit_int8(0x5C);
5471   emit_operand(dst, src);
5472 }
5473 
5474 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5475   assert(VM_Version::supports_avx(), "");
5476   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5477   attributes.set_rex_vex_w_reverted();
5478   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5479   emit_int8(0x5C);
5480   emit_int8((unsigned char)(0xC0 | encode));
5481 }
5482 
5483 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
5484   assert(VM_Version::supports_avx(), "");
5485   InstructionMark im(this);
5486   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5487   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5488   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5489   emit_int8(0x5C);
5490   emit_operand(dst, src);
5491 }
5492 
5493 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5494   assert(VM_Version::supports_avx(), "");
5495   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5496   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5497   emit_int8(0x5C);
5498   emit_int8((unsigned char)(0xC0 | encode));
5499 }
5500 
5501 //====================VECTOR ARITHMETIC=====================================
5502 
5503 // Float-point vector arithmetic
5504 
5505 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
5506   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5507   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5508   attributes.set_rex_vex_w_reverted();
5509   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5510   emit_int8(0x58);
5511   emit_int8((unsigned char)(0xC0 | encode));
5512 }
5513 
5514 void Assembler::addpd(XMMRegister dst, Address src) {
5515   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5516   InstructionMark im(this);
5517   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5518   attributes.set_rex_vex_w_reverted();
5519   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5520   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5521   emit_int8(0x58);
5522   emit_operand(dst, src);
5523 }
5524 
5525 
5526 void Assembler::addps(XMMRegister dst, XMMRegister src) {
5527   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5528   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5529   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5530   emit_int8(0x58);
5531   emit_int8((unsigned char)(0xC0 | encode));
5532 }
5533 
5534 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5535   assert(VM_Version::supports_avx(), "");
5536   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5537   attributes.set_rex_vex_w_reverted();
5538   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5539   emit_int8(0x58);
5540   emit_int8((unsigned char)(0xC0 | encode));
5541 }
5542 
5543 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5544   assert(VM_Version::supports_avx(), "");
5545   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5546   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5547   emit_int8(0x58);
5548   emit_int8((unsigned char)(0xC0 | encode));
5549 }
5550 
5551 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5552   assert(VM_Version::supports_avx(), "");
5553   InstructionMark im(this);
5554   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5555   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5556   attributes.set_rex_vex_w_reverted();
5557   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5558   emit_int8(0x58);
5559   emit_operand(dst, src);
5560 }
5561 
5562 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5563   assert(VM_Version::supports_avx(), "");
5564   InstructionMark im(this);
5565   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5566   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5567   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5568   emit_int8(0x58);
5569   emit_operand(dst, src);
5570 }
5571 
5572 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
5573   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5574   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5575   attributes.set_rex_vex_w_reverted();
5576   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5577   emit_int8(0x5C);
5578   emit_int8((unsigned char)(0xC0 | encode));
5579 }
5580 
5581 void Assembler::subps(XMMRegister dst, XMMRegister src) {
5582   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5583   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5584   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5585   emit_int8(0x5C);
5586   emit_int8((unsigned char)(0xC0 | encode));
5587 }
5588 
5589 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5590   assert(VM_Version::supports_avx(), "");
5591   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5592   attributes.set_rex_vex_w_reverted();
5593   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5594   emit_int8(0x5C);
5595   emit_int8((unsigned char)(0xC0 | encode));
5596 }
5597 
5598 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5599   assert(VM_Version::supports_avx(), "");
5600   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5601   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5602   emit_int8(0x5C);
5603   emit_int8((unsigned char)(0xC0 | encode));
5604 }
5605 
5606 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5607   assert(VM_Version::supports_avx(), "");
5608   InstructionMark im(this);
5609   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5610   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5611   attributes.set_rex_vex_w_reverted();
5612   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5613   emit_int8(0x5C);
5614   emit_operand(dst, src);
5615 }
5616 
5617 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5618   assert(VM_Version::supports_avx(), "");
5619   InstructionMark im(this);
5620   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5621   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5622   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5623   emit_int8(0x5C);
5624   emit_operand(dst, src);
5625 }
5626 
5627 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
5628   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5629   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5630   attributes.set_rex_vex_w_reverted();
5631   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5632   emit_int8(0x59);
5633   emit_int8((unsigned char)(0xC0 | encode));
5634 }
5635 
5636 void Assembler::mulpd(XMMRegister dst, Address src) {
5637   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5638   InstructionMark im(this);
5639   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5640   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5641   attributes.set_rex_vex_w_reverted();
5642   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5643   emit_int8(0x59);
5644   emit_operand(dst, src);
5645 }
5646 
5647 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
5648   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5649   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5650   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5651   emit_int8(0x59);
5652   emit_int8((unsigned char)(0xC0 | encode));
5653 }
5654 
5655 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5656   assert(VM_Version::supports_avx(), "");
5657   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5658   attributes.set_rex_vex_w_reverted();
5659   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5660   emit_int8(0x59);
5661   emit_int8((unsigned char)(0xC0 | encode));
5662 }
5663 
5664 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5665   assert(VM_Version::supports_avx(), "");
5666   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5667   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5668   emit_int8(0x59);
5669   emit_int8((unsigned char)(0xC0 | encode));
5670 }
5671 
5672 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5673   assert(VM_Version::supports_avx(), "");
5674   InstructionMark im(this);
5675   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5676   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5677   attributes.set_rex_vex_w_reverted();
5678   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5679   emit_int8(0x59);
5680   emit_operand(dst, src);
5681 }
5682 
5683 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5684   assert(VM_Version::supports_avx(), "");
5685   InstructionMark im(this);
5686   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5687   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5688   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5689   emit_int8(0x59);
5690   emit_operand(dst, src);
5691 }
5692 
5693 void Assembler::vfmadd231pd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
5694   assert(VM_Version::supports_fma(), "");
5695   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5696   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5697   emit_int8((unsigned char)0xB8);
5698   emit_int8((unsigned char)(0xC0 | encode));
5699 }
5700 
5701 void Assembler::vfmadd231ps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
5702   assert(VM_Version::supports_fma(), "");
5703   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5704   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5705   emit_int8((unsigned char)0xB8);
5706   emit_int8((unsigned char)(0xC0 | encode));
5707 }
5708 
5709 void Assembler::vfmadd231pd(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) {
5710   assert(VM_Version::supports_fma(), "");
5711   InstructionMark im(this);
5712   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5713   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5714   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5715   emit_int8((unsigned char)0xB8);
5716   emit_operand(dst, src2);
5717 }
5718 
5719 void Assembler::vfmadd231ps(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) {
5720   assert(VM_Version::supports_fma(), "");
5721   InstructionMark im(this);
5722   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5723   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5724   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5725   emit_int8((unsigned char)0xB8);
5726   emit_operand(dst, src2);
5727 }
5728 
5729 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
5730   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5731   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5732   attributes.set_rex_vex_w_reverted();
5733   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5734   emit_int8(0x5E);
5735   emit_int8((unsigned char)(0xC0 | encode));
5736 }
5737 
5738 void Assembler::divps(XMMRegister dst, XMMRegister src) {
5739   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5740   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5741   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5742   emit_int8(0x5E);
5743   emit_int8((unsigned char)(0xC0 | encode));
5744 }
5745 
5746 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5747   assert(VM_Version::supports_avx(), "");
5748   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5749   attributes.set_rex_vex_w_reverted();
5750   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5751   emit_int8(0x5E);
5752   emit_int8((unsigned char)(0xC0 | encode));
5753 }
5754 
5755 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5756   assert(VM_Version::supports_avx(), "");
5757   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5758   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5759   emit_int8(0x5E);
5760   emit_int8((unsigned char)(0xC0 | encode));
5761 }
5762 
5763 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5764   assert(VM_Version::supports_avx(), "");
5765   InstructionMark im(this);
5766   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5767   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5768   attributes.set_rex_vex_w_reverted();
5769   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5770   emit_int8(0x5E);
5771   emit_operand(dst, src);
5772 }
5773 
5774 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5775   assert(VM_Version::supports_avx(), "");
5776   InstructionMark im(this);
5777   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5778   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5779   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5780   emit_int8(0x5E);
5781   emit_operand(dst, src);
5782 }
5783 
5784 void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
5785   assert(VM_Version::supports_avx(), "");
5786   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5787   attributes.set_rex_vex_w_reverted();
5788   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5789   emit_int8(0x51);
5790   emit_int8((unsigned char)(0xC0 | encode));
5791 }
5792 
5793 void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
5794   assert(VM_Version::supports_avx(), "");
5795   InstructionMark im(this);
5796   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5797   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5798   attributes.set_rex_vex_w_reverted();
5799   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5800   emit_int8(0x51);
5801   emit_operand(dst, src);
5802 }
5803 
5804 void Assembler::vsqrtps(XMMRegister dst, XMMRegister src, int vector_len) {
5805   assert(VM_Version::supports_avx(), "");
5806   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5807   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5808   emit_int8(0x51);
5809   emit_int8((unsigned char)(0xC0 | encode));
5810 }
5811 
5812 void Assembler::vsqrtps(XMMRegister dst, Address src, int vector_len) {
5813   assert(VM_Version::supports_avx(), "");
5814   InstructionMark im(this);
5815   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5816   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5817   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5818   emit_int8(0x51);
5819   emit_operand(dst, src);
5820 }
5821 
5822 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
5823   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5824   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5825   attributes.set_rex_vex_w_reverted();
5826   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5827   emit_int8(0x54);
5828   emit_int8((unsigned char)(0xC0 | encode));
5829 }
5830 
5831 void Assembler::andps(XMMRegister dst, XMMRegister src) {
5832   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5833   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5834   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5835   emit_int8(0x54);
5836   emit_int8((unsigned char)(0xC0 | encode));
5837 }
5838 
5839 void Assembler::andps(XMMRegister dst, Address src) {
5840   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5841   InstructionMark im(this);
5842   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5843   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5844   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5845   emit_int8(0x54);
5846   emit_operand(dst, src);
5847 }
5848 
5849 void Assembler::andpd(XMMRegister dst, Address src) {
5850   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5851   InstructionMark im(this);
5852   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5853   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5854   attributes.set_rex_vex_w_reverted();
5855   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5856   emit_int8(0x54);
5857   emit_operand(dst, src);
5858 }
5859 
5860 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5861   assert(VM_Version::supports_avx(), "");
5862   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5863   attributes.set_rex_vex_w_reverted();
5864   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5865   emit_int8(0x54);
5866   emit_int8((unsigned char)(0xC0 | encode));
5867 }
5868 
5869 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5870   assert(VM_Version::supports_avx(), "");
5871   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5872   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5873   emit_int8(0x54);
5874   emit_int8((unsigned char)(0xC0 | encode));
5875 }
5876 
5877 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5878   assert(VM_Version::supports_avx(), "");
5879   InstructionMark im(this);
5880   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5881   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5882   attributes.set_rex_vex_w_reverted();
5883   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5884   emit_int8(0x54);
5885   emit_operand(dst, src);
5886 }
5887 
5888 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5889   assert(VM_Version::supports_avx(), "");
5890   InstructionMark im(this);
5891   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5892   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5893   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5894   emit_int8(0x54);
5895   emit_operand(dst, src);
5896 }
5897 
5898 void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
5899   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5900   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5901   attributes.set_rex_vex_w_reverted();
5902   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5903   emit_int8(0x15);
5904   emit_int8((unsigned char)(0xC0 | encode));
5905 }
5906 
5907 void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
5908   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5909   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5910   attributes.set_rex_vex_w_reverted();
5911   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5912   emit_int8(0x14);
5913   emit_int8((unsigned char)(0xC0 | encode));
5914 }
5915 
5916 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
5917   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5918   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5919   attributes.set_rex_vex_w_reverted();
5920   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5921   emit_int8(0x57);
5922   emit_int8((unsigned char)(0xC0 | encode));
5923 }
5924 
5925 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
5926   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5927   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5928   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5929   emit_int8(0x57);
5930   emit_int8((unsigned char)(0xC0 | encode));
5931 }
5932 
5933 void Assembler::xorpd(XMMRegister dst, Address src) {
5934   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5935   InstructionMark im(this);
5936   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5937   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5938   attributes.set_rex_vex_w_reverted();
5939   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5940   emit_int8(0x57);
5941   emit_operand(dst, src);
5942 }
5943 
5944 void Assembler::xorps(XMMRegister dst, Address src) {
5945   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5946   InstructionMark im(this);
5947   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5948   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5949   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5950   emit_int8(0x57);
5951   emit_operand(dst, src);
5952 }
5953 
5954 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5955   assert(VM_Version::supports_avx(), "");
5956   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5957   attributes.set_rex_vex_w_reverted();
5958   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5959   emit_int8(0x57);
5960   emit_int8((unsigned char)(0xC0 | encode));
5961 }
5962 
5963 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5964   assert(VM_Version::supports_avx(), "");
5965   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5966   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5967   emit_int8(0x57);
5968   emit_int8((unsigned char)(0xC0 | encode));
5969 }
5970 
5971 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5972   assert(VM_Version::supports_avx(), "");
5973   InstructionMark im(this);
5974   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5975   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5976   attributes.set_rex_vex_w_reverted();
5977   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5978   emit_int8(0x57);
5979   emit_operand(dst, src);
5980 }
5981 
5982 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5983   assert(VM_Version::supports_avx(), "");
5984   InstructionMark im(this);
5985   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5986   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5987   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5988   emit_int8(0x57);
5989   emit_operand(dst, src);
5990 }
5991 
5992 // Integer vector arithmetic
5993 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5994   assert(VM_Version::supports_avx() && (vector_len == 0) ||
5995          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
5996   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5997   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5998   emit_int8(0x01);
5999   emit_int8((unsigned char)(0xC0 | encode));
6000 }
6001 
6002 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6003   assert(VM_Version::supports_avx() && (vector_len == 0) ||
6004          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
6005   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
6006   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6007   emit_int8(0x02);
6008   emit_int8((unsigned char)(0xC0 | encode));
6009 }
6010 
6011 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
6012   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6013   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6014   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6015   emit_int8((unsigned char)0xFC);
6016   emit_int8((unsigned char)(0xC0 | encode));
6017 }
6018 
6019 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
6020   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6021   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6022   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6023   emit_int8((unsigned char)0xFD);
6024   emit_int8((unsigned char)(0xC0 | encode));
6025 }
6026 
6027 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
6028   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6029   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6030   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6031   emit_int8((unsigned char)0xFE);
6032   emit_int8((unsigned char)(0xC0 | encode));
6033 }
6034 
6035 void Assembler::paddd(XMMRegister dst, Address src) {
6036   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6037   InstructionMark im(this);
6038   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6039   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6040   emit_int8((unsigned char)0xFE);
6041   emit_operand(dst, src);
6042 }
6043 
6044 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
6045   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6046   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6047   attributes.set_rex_vex_w_reverted();
6048   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6049   emit_int8((unsigned char)0xD4);
6050   emit_int8((unsigned char)(0xC0 | encode));
6051 }
6052 
6053 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
6054   assert(VM_Version::supports_sse3(), "");
6055   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
6056   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6057   emit_int8(0x01);
6058   emit_int8((unsigned char)(0xC0 | encode));
6059 }
6060 
6061 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
6062   assert(VM_Version::supports_sse3(), "");
6063   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
6064   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6065   emit_int8(0x02);
6066   emit_int8((unsigned char)(0xC0 | encode));
6067 }
6068 
6069 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6070   assert(UseAVX > 0, "requires some form of AVX");
6071   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6072   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6073   emit_int8((unsigned char)0xFC);
6074   emit_int8((unsigned char)(0xC0 | encode));
6075 }
6076 
6077 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6078   assert(UseAVX > 0, "requires some form of AVX");
6079   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6080   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6081   emit_int8((unsigned char)0xFD);
6082   emit_int8((unsigned char)(0xC0 | encode));
6083 }
6084 
6085 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6086   assert(UseAVX > 0, "requires some form of AVX");
6087   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6088   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6089   emit_int8((unsigned char)0xFE);
6090   emit_int8((unsigned char)(0xC0 | encode));
6091 }
6092 
6093 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6094   assert(UseAVX > 0, "requires some form of AVX");
6095   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6096   attributes.set_rex_vex_w_reverted();
6097   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6098   emit_int8((unsigned char)0xD4);
6099   emit_int8((unsigned char)(0xC0 | encode));
6100 }
6101 
6102 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6103   assert(UseAVX > 0, "requires some form of AVX");
6104   InstructionMark im(this);
6105   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6106   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
6107   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6108   emit_int8((unsigned char)0xFC);
6109   emit_operand(dst, src);
6110 }
6111 
6112 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6113   assert(UseAVX > 0, "requires some form of AVX");
6114   InstructionMark im(this);
6115   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6116   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
6117   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6118   emit_int8((unsigned char)0xFD);
6119   emit_operand(dst, src);
6120 }
6121 
6122 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6123   assert(UseAVX > 0, "requires some form of AVX");
6124   InstructionMark im(this);
6125   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6126   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6127   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6128   emit_int8((unsigned char)0xFE);
6129   emit_operand(dst, src);
6130 }
6131 
6132 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6133   assert(UseAVX > 0, "requires some form of AVX");
6134   InstructionMark im(this);
6135   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6136   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
6137   attributes.set_rex_vex_w_reverted();
6138   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6139   emit_int8((unsigned char)0xD4);
6140   emit_operand(dst, src);
6141 }
6142 
6143 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
6144   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6145   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6146   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6147   emit_int8((unsigned char)0xF8);
6148   emit_int8((unsigned char)(0xC0 | encode));
6149 }
6150 
6151 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
6152   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6153   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6154   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6155   emit_int8((unsigned char)0xF9);
6156   emit_int8((unsigned char)(0xC0 | encode));
6157 }
6158 
6159 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
6160   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6161   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6162   emit_int8((unsigned char)0xFA);
6163   emit_int8((unsigned char)(0xC0 | encode));
6164 }
6165 
6166 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
6167   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6168   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6169   attributes.set_rex_vex_w_reverted();
6170   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6171   emit_int8((unsigned char)0xFB);
6172   emit_int8((unsigned char)(0xC0 | encode));
6173 }
6174 
6175 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6176   assert(UseAVX > 0, "requires some form of AVX");
6177   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6178   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6179   emit_int8((unsigned char)0xF8);
6180   emit_int8((unsigned char)(0xC0 | encode));
6181 }
6182 
6183 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6184   assert(UseAVX > 0, "requires some form of AVX");
6185   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6186   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6187   emit_int8((unsigned char)0xF9);
6188   emit_int8((unsigned char)(0xC0 | encode));
6189 }
6190 
6191 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6192   assert(UseAVX > 0, "requires some form of AVX");
6193   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6194   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6195   emit_int8((unsigned char)0xFA);
6196   emit_int8((unsigned char)(0xC0 | encode));
6197 }
6198 
6199 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6200   assert(UseAVX > 0, "requires some form of AVX");
6201   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6202   attributes.set_rex_vex_w_reverted();
6203   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6204   emit_int8((unsigned char)0xFB);
6205   emit_int8((unsigned char)(0xC0 | encode));
6206 }
6207 
6208 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6209   assert(UseAVX > 0, "requires some form of AVX");
6210   InstructionMark im(this);
6211   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6212   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
6213   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6214   emit_int8((unsigned char)0xF8);
6215   emit_operand(dst, src);
6216 }
6217 
6218 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6219   assert(UseAVX > 0, "requires some form of AVX");
6220   InstructionMark im(this);
6221   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6222   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
6223   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6224   emit_int8((unsigned char)0xF9);
6225   emit_operand(dst, src);
6226 }
6227 
6228 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6229   assert(UseAVX > 0, "requires some form of AVX");
6230   InstructionMark im(this);
6231   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6232   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6233   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6234   emit_int8((unsigned char)0xFA);
6235   emit_operand(dst, src);
6236 }
6237 
6238 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6239   assert(UseAVX > 0, "requires some form of AVX");
6240   InstructionMark im(this);
6241   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6242   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
6243   attributes.set_rex_vex_w_reverted();
6244   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6245   emit_int8((unsigned char)0xFB);
6246   emit_operand(dst, src);
6247 }
6248 
6249 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
6250   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6251   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6252   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6253   emit_int8((unsigned char)0xD5);
6254   emit_int8((unsigned char)(0xC0 | encode));
6255 }
6256 
6257 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
6258   assert(VM_Version::supports_sse4_1(), "");
6259   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6260   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6261   emit_int8(0x40);
6262   emit_int8((unsigned char)(0xC0 | encode));
6263 }
6264 
6265 void Assembler::pmuludq(XMMRegister dst, XMMRegister src) {
6266   assert(VM_Version::supports_sse2(), "");
6267   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6268   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6269   emit_int8((unsigned char)(0xF4));
6270   emit_int8((unsigned char)(0xC0 | encode));
6271 }
6272 
6273 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6274   assert(UseAVX > 0, "requires some form of AVX");
6275   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6276   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6277   emit_int8((unsigned char)0xD5);
6278   emit_int8((unsigned char)(0xC0 | encode));
6279 }
6280 
6281 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6282   assert(UseAVX > 0, "requires some form of AVX");
6283   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6284   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6285   emit_int8(0x40);
6286   emit_int8((unsigned char)(0xC0 | encode));
6287 }
6288 
6289 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6290   assert(UseAVX > 2, "requires some form of EVEX");
6291   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
6292   attributes.set_is_evex_instruction();
6293   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6294   emit_int8(0x40);
6295   emit_int8((unsigned char)(0xC0 | encode));
6296 }
6297 
6298 void Assembler::vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6299   assert(UseAVX > 0, "requires some form of AVX");
6300   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6301   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6302   emit_int8((unsigned char)(0xF4));
6303   emit_int8((unsigned char)(0xC0 | encode));
6304 }
6305 
6306 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6307   assert(UseAVX > 0, "requires some form of AVX");
6308   InstructionMark im(this);
6309   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6310   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
6311   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6312   emit_int8((unsigned char)0xD5);
6313   emit_operand(dst, src);
6314 }
6315 
6316 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6317   assert(UseAVX > 0, "requires some form of AVX");
6318   InstructionMark im(this);
6319   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6320   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6321   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6322   emit_int8(0x40);
6323   emit_operand(dst, src);
6324 }
6325 
6326 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6327   assert(UseAVX > 2, "requires some form of EVEX");
6328   InstructionMark im(this);
6329   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
6330   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
6331   attributes.set_is_evex_instruction();
6332   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6333   emit_int8(0x40);
6334   emit_operand(dst, src);
6335 }
6336 
6337 // Min, max
6338 void Assembler::pminsb(XMMRegister dst, XMMRegister src) {
6339   assert(VM_Version::supports_sse4_1(), "");
6340   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
6341   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6342   emit_int8(0x38);
6343   emit_int8((unsigned char)(0xC0 | encode));
6344 }
6345 
6346 void Assembler::vpminsb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6347   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6348         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
6349   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
6350   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6351   emit_int8(0x39);
6352   emit_int8((unsigned char)(0xC0 | encode));
6353 }
6354 
6355 void Assembler::pminsw(XMMRegister dst, XMMRegister src) {
6356   assert(VM_Version::supports_sse2(), "");
6357   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
6358   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6359   emit_int8((unsigned char)0xEA);
6360   emit_int8((unsigned char)(0xC0 | encode));
6361 }
6362 
6363 void Assembler::vpminsw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6364   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6365         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
6366   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
6367   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6368   emit_int8((unsigned char)0xEA);
6369   emit_int8((unsigned char)(0xC0 | encode));
6370 }
6371 
6372 void Assembler::pminsd(XMMRegister dst, XMMRegister src) {
6373   assert(VM_Version::supports_sse4_1(), "");
6374   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
6375   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6376   emit_int8(0x39);
6377   emit_int8((unsigned char)(0xC0 | encode));
6378 }
6379 
6380 void Assembler::vpminsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6381   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6382         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex()), "");
6383   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
6384   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6385   emit_int8(0x39);
6386   emit_int8((unsigned char)(0xC0 | encode));
6387 }
6388 
6389 void Assembler::vpminsq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6390   assert(UseAVX > 2, "requires AVX512F");
6391   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6392   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6393   emit_int8(0x39);
6394   emit_int8((unsigned char)(0xC0 | encode));
6395 }
6396 
6397 void Assembler::minps(XMMRegister dst, XMMRegister src) {
6398   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6399   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6400   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6401   emit_int8(0x5D);
6402   emit_int8((unsigned char)(0xC0 | encode));
6403 }
6404 void Assembler::vminps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6405   assert(vector_len >= AVX_512bit ? VM_Version::supports_avx512vl() : VM_Version::supports_avx(), "");
6406   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
6407   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6408   emit_int8(0x5D);
6409   emit_int8((unsigned char)(0xC0 | encode));
6410 }
6411 
6412 void Assembler::minpd(XMMRegister dst, XMMRegister src) {
6413   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6414   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6415   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6416   emit_int8(0x5D);
6417   emit_int8((unsigned char)(0xC0 | encode));
6418 }
6419 void Assembler::vminpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6420   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6421         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
6422   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
6423   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6424   emit_int8(0x5D);
6425   emit_int8((unsigned char)(0xC0 | encode));
6426 }
6427 
6428 void Assembler::pmaxsb(XMMRegister dst, XMMRegister src) {
6429   assert(VM_Version::supports_sse4_1(), "");
6430   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
6431   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6432   emit_int8(0x3C);
6433   emit_int8((unsigned char)(0xC0 | encode));
6434 }
6435 
6436 void Assembler::vpmaxsb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6437   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6438         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
6439   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
6440   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6441   emit_int8(0x3C);
6442   emit_int8((unsigned char)(0xC0 | encode));
6443 }
6444 
6445 void Assembler::pmaxsw(XMMRegister dst, XMMRegister src) {
6446   assert(VM_Version::supports_sse2(), "");
6447   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
6448   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6449   emit_int8((unsigned char)0xEE);
6450   emit_int8((unsigned char)(0xC0 | encode));
6451 }
6452 
6453 void Assembler::vpmaxsw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6454   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6455         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
6456   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
6457   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6458   emit_int8((unsigned char)0xEE);
6459   emit_int8((unsigned char)(0xC0 | encode));
6460 }
6461 
6462 void Assembler::pmaxsd(XMMRegister dst, XMMRegister src) {
6463   assert(VM_Version::supports_sse4_1(), "");
6464   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
6465   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6466   emit_int8(0x3D);
6467   emit_int8((unsigned char)(0xC0 | encode));
6468 }
6469 
6470 void Assembler::vpmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6471   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6472         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex()), "");
6473   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
6474   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6475   emit_int8(0x3D);
6476   emit_int8((unsigned char)(0xC0 | encode));
6477 }
6478 
6479 void Assembler::vpmaxsq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6480   assert(UseAVX > 2, "requires AVX512F");
6481   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6482   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6483   emit_int8(0x3D);
6484   emit_int8((unsigned char)(0xC0 | encode));
6485 }
6486 
6487 void Assembler::maxps(XMMRegister dst, XMMRegister src) {
6488   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6489   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6490   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6491   emit_int8(0x5F);
6492   emit_int8((unsigned char)(0xC0 | encode));
6493 }
6494 
6495 void Assembler::vmaxps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6496   assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
6497   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
6498   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6499   emit_int8(0x5F);
6500   emit_int8((unsigned char)(0xC0 | encode));
6501 }
6502 
6503 void Assembler::maxpd(XMMRegister dst, XMMRegister src) {
6504   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6505   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6506   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6507   emit_int8(0x5F);
6508   emit_int8((unsigned char)(0xC0 | encode));
6509 }
6510 
6511 void Assembler::vmaxpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6512   assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
6513   InstructionAttr attributes(vector_len, /* vex_w */true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
6514   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6515   emit_int8(0x5F);
6516   emit_int8((unsigned char)(0xC0 | encode));
6517 }
6518 
6519 // Shift packed integers left by specified number of bits.
6520 void Assembler::psllw(XMMRegister dst, int shift) {
6521   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6522   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6523   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
6524   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6525   emit_int8(0x71);
6526   emit_int8((unsigned char)(0xC0 | encode));
6527   emit_int8(shift & 0xFF);
6528 }
6529 
6530 void Assembler::pslld(XMMRegister dst, int shift) {
6531   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6532   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6533   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
6534   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6535   emit_int8(0x72);
6536   emit_int8((unsigned char)(0xC0 | encode));
6537   emit_int8(shift & 0xFF);
6538 }
6539 
6540 void Assembler::psllq(XMMRegister dst, int shift) {
6541   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6542   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6543   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
6544   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6545   emit_int8(0x73);
6546   emit_int8((unsigned char)(0xC0 | encode));
6547   emit_int8(shift & 0xFF);
6548 }
6549 
6550 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
6551   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6552   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6553   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6554   emit_int8((unsigned char)0xF1);
6555   emit_int8((unsigned char)(0xC0 | encode));
6556 }
6557 
6558 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
6559   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6560   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6561   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6562   emit_int8((unsigned char)0xF2);
6563   emit_int8((unsigned char)(0xC0 | encode));
6564 }
6565 
6566 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
6567   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6568   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6569   attributes.set_rex_vex_w_reverted();
6570   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6571   emit_int8((unsigned char)0xF3);
6572   emit_int8((unsigned char)(0xC0 | encode));
6573 }
6574 
6575 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6576   assert(UseAVX > 0, "requires some form of AVX");
6577   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6578   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
6579   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6580   emit_int8(0x71);
6581   emit_int8((unsigned char)(0xC0 | encode));
6582   emit_int8(shift & 0xFF);
6583 }
6584 
6585 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6586   assert(UseAVX > 0, "requires some form of AVX");
6587   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6588   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6589   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
6590   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6591   emit_int8(0x72);
6592   emit_int8((unsigned char)(0xC0 | encode));
6593   emit_int8(shift & 0xFF);
6594 }
6595 
6596 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6597   assert(UseAVX > 0, "requires some form of AVX");
6598   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6599   attributes.set_rex_vex_w_reverted();
6600   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
6601   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6602   emit_int8(0x73);
6603   emit_int8((unsigned char)(0xC0 | encode));
6604   emit_int8(shift & 0xFF);
6605 }
6606 
6607 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6608   assert(UseAVX > 0, "requires some form of AVX");
6609   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6610   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6611   emit_int8((unsigned char)0xF1);
6612   emit_int8((unsigned char)(0xC0 | encode));
6613 }
6614 
6615 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6616   assert(UseAVX > 0, "requires some form of AVX");
6617   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6618   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6619   emit_int8((unsigned char)0xF2);
6620   emit_int8((unsigned char)(0xC0 | encode));
6621 }
6622 
6623 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6624   assert(UseAVX > 0, "requires some form of AVX");
6625   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6626   attributes.set_rex_vex_w_reverted();
6627   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6628   emit_int8((unsigned char)0xF3);
6629   emit_int8((unsigned char)(0xC0 | encode));
6630 }
6631 
6632 // Shift packed integers logically right by specified number of bits.
6633 void Assembler::psrlw(XMMRegister dst, int shift) {
6634   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6635   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6636   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
6637   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6638   emit_int8(0x71);
6639   emit_int8((unsigned char)(0xC0 | encode));
6640   emit_int8(shift & 0xFF);
6641 }
6642 
6643 void Assembler::psrld(XMMRegister dst, int shift) {
6644   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6645   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6646   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
6647   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6648   emit_int8(0x72);
6649   emit_int8((unsigned char)(0xC0 | encode));
6650   emit_int8(shift & 0xFF);
6651 }
6652 
6653 void Assembler::psrlq(XMMRegister dst, int shift) {
6654   // Do not confuse it with psrldq SSE2 instruction which
6655   // shifts 128 bit value in xmm register by number of bytes.
6656   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6657   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6658   attributes.set_rex_vex_w_reverted();
6659   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
6660   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6661   emit_int8(0x73);
6662   emit_int8((unsigned char)(0xC0 | encode));
6663   emit_int8(shift & 0xFF);
6664 }
6665 
6666 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
6667   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6668   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6669   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6670   emit_int8((unsigned char)0xD1);
6671   emit_int8((unsigned char)(0xC0 | encode));
6672 }
6673 
6674 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
6675   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6676   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6677   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6678   emit_int8((unsigned char)0xD2);
6679   emit_int8((unsigned char)(0xC0 | encode));
6680 }
6681 
6682 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
6683   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6684   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6685   attributes.set_rex_vex_w_reverted();
6686   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6687   emit_int8((unsigned char)0xD3);
6688   emit_int8((unsigned char)(0xC0 | encode));
6689 }
6690 
6691 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6692   assert(UseAVX > 0, "requires some form of AVX");
6693   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6694   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
6695   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6696   emit_int8(0x71);
6697   emit_int8((unsigned char)(0xC0 | encode));
6698   emit_int8(shift & 0xFF);
6699 }
6700 
6701 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6702   assert(UseAVX > 0, "requires some form of AVX");
6703   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6704   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
6705   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6706   emit_int8(0x72);
6707   emit_int8((unsigned char)(0xC0 | encode));
6708   emit_int8(shift & 0xFF);
6709 }
6710 
6711 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6712   assert(UseAVX > 0, "requires some form of AVX");
6713   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6714   attributes.set_rex_vex_w_reverted();
6715   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
6716   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6717   emit_int8(0x73);
6718   emit_int8((unsigned char)(0xC0 | encode));
6719   emit_int8(shift & 0xFF);
6720 }
6721 
6722 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6723   assert(UseAVX > 0, "requires some form of AVX");
6724   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6725   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6726   emit_int8((unsigned char)0xD1);
6727   emit_int8((unsigned char)(0xC0 | encode));
6728 }
6729 
6730 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6731   assert(UseAVX > 0, "requires some form of AVX");
6732   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6733   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6734   emit_int8((unsigned char)0xD2);
6735   emit_int8((unsigned char)(0xC0 | encode));
6736 }
6737 
6738 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6739   assert(UseAVX > 0, "requires some form of AVX");
6740   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6741   attributes.set_rex_vex_w_reverted();
6742   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6743   emit_int8((unsigned char)0xD3);
6744   emit_int8((unsigned char)(0xC0 | encode));
6745 }
6746 
6747 // Shift packed integers arithmetically right by specified number of bits.
6748 void Assembler::psraw(XMMRegister dst, int shift) {
6749   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6750   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6751   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6752   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6753   emit_int8(0x71);
6754   emit_int8((unsigned char)(0xC0 | encode));
6755   emit_int8(shift & 0xFF);
6756 }
6757 
6758 void Assembler::psrad(XMMRegister dst, int shift) {
6759   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6760   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6761   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
6762   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6763   emit_int8(0x72);
6764   emit_int8((unsigned char)(0xC0 | encode));
6765   emit_int8(shift & 0xFF);
6766 }
6767 
6768 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
6769   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6770   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6771   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6772   emit_int8((unsigned char)0xE1);
6773   emit_int8((unsigned char)(0xC0 | encode));
6774 }
6775 
6776 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
6777   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6778   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6779   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6780   emit_int8((unsigned char)0xE2);
6781   emit_int8((unsigned char)(0xC0 | encode));
6782 }
6783 
6784 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6785   assert(UseAVX > 0, "requires some form of AVX");
6786   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6787   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6788   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6789   emit_int8(0x71);
6790   emit_int8((unsigned char)(0xC0 | encode));
6791   emit_int8(shift & 0xFF);
6792 }
6793 
6794 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6795   assert(UseAVX > 0, "requires some form of AVX");
6796   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6797   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6798   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6799   emit_int8(0x72);
6800   emit_int8((unsigned char)(0xC0 | encode));
6801   emit_int8(shift & 0xFF);
6802 }
6803 
6804 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6805   assert(UseAVX > 0, "requires some form of AVX");
6806   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6807   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6808   emit_int8((unsigned char)0xE1);
6809   emit_int8((unsigned char)(0xC0 | encode));
6810 }
6811 
6812 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6813   assert(UseAVX > 0, "requires some form of AVX");
6814   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6815   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6816   emit_int8((unsigned char)0xE2);
6817   emit_int8((unsigned char)(0xC0 | encode));
6818 }
6819 
6820 //Variable Shift packed integers logically left.
6821 void Assembler::vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6822   assert(UseAVX > 1, "requires AVX2");
6823   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6824   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6825   emit_int8(0x47);
6826   emit_int8((unsigned char)(0xC0 | encode));
6827 }
6828 
6829 void Assembler::vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6830   assert(UseAVX > 1, "requires AVX2");
6831   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6832   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6833   emit_int8(0x47);
6834   emit_int8((unsigned char)(0xC0 | encode));
6835 }
6836 
6837 //Variable Shift packed integers logically right.
6838 void Assembler::vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6839   assert(UseAVX > 1, "requires AVX2");
6840   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6841   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6842   emit_int8(0x45);
6843   emit_int8((unsigned char)(0xC0 | encode));
6844 }
6845 
6846 void Assembler::vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6847   assert(UseAVX > 1, "requires AVX2");
6848   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6849   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6850   emit_int8(0x45);
6851   emit_int8((unsigned char)(0xC0 | encode));
6852 }
6853 
6854 //Variable right Shift arithmetic packed integers .
6855 void Assembler::vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6856   assert(UseAVX > 1, "requires AVX2");
6857   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6858   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6859   emit_int8(0x46);
6860   emit_int8((unsigned char)(0xC0 | encode));
6861 }
6862 
6863 void Assembler::vpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6864   assert(UseAVX > 1, "requires AVX2");
6865   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6866   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6867   emit_int8(0x46);
6868   emit_int8((unsigned char)(0xC0 | encode));
6869 }
6870 
6871 // logical operations packed integers
6872 void Assembler::pand(XMMRegister dst, XMMRegister src) {
6873   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6874   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6875   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6876   emit_int8((unsigned char)0xDB);
6877   emit_int8((unsigned char)(0xC0 | encode));
6878 }
6879 
6880 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6881   assert(UseAVX > 0, "requires some form of AVX");
6882   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6883   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6884   emit_int8((unsigned char)0xDB);
6885   emit_int8((unsigned char)(0xC0 | encode));
6886 }
6887 
6888 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6889   assert(UseAVX > 0, "requires some form of AVX");
6890   InstructionMark im(this);
6891   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6892   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6893   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6894   emit_int8((unsigned char)0xDB);
6895   emit_operand(dst, src);
6896 }
6897 
6898 void Assembler::evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
6899   assert(VM_Version::supports_evex(), "");
6900   // Encoding: EVEX.NDS.XXX.66.0F.W0 DB /r
6901   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6902   attributes.set_is_evex_instruction();
6903   attributes.set_embedded_opmask_register_specifier(mask);
6904   if (merge) {
6905     attributes.reset_is_clear_context();
6906   }
6907   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6908   emit_int8((unsigned char)0xDB);
6909   emit_int8((unsigned char)(0xC0 | encode));
6910 }
6911 
6912 void Assembler::vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6913   assert(UseAVX > 2, "requires some form of EVEX");
6914   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6915   attributes.set_rex_vex_w_reverted();
6916   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6917   emit_int8((unsigned char)0xDB);
6918   emit_int8((unsigned char)(0xC0 | encode));
6919 }
6920 
6921 void Assembler::pandn(XMMRegister dst, XMMRegister src) {
6922   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6923   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6924   attributes.set_rex_vex_w_reverted();
6925   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6926   emit_int8((unsigned char)0xDF);
6927   emit_int8((unsigned char)(0xC0 | encode));
6928 }
6929 
6930 void Assembler::por(XMMRegister dst, XMMRegister src) {
6931   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6932   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6933   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6934   emit_int8((unsigned char)0xEB);
6935   emit_int8((unsigned char)(0xC0 | encode));
6936 }
6937 
6938 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6939   assert(UseAVX > 0, "requires some form of AVX");
6940   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6941   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6942   emit_int8((unsigned char)0xEB);
6943   emit_int8((unsigned char)(0xC0 | encode));
6944 }
6945 
6946 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6947   assert(UseAVX > 0, "requires some form of AVX");
6948   InstructionMark im(this);
6949   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6950   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6951   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6952   emit_int8((unsigned char)0xEB);
6953   emit_operand(dst, src);
6954 }
6955 
6956 void Assembler::vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6957   assert(UseAVX > 2, "requires some form of EVEX");
6958   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6959   attributes.set_rex_vex_w_reverted();
6960   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6961   emit_int8((unsigned char)0xEB);
6962   emit_int8((unsigned char)(0xC0 | encode));
6963 }
6964 
6965 void Assembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
6966   assert(VM_Version::supports_evex(), "");
6967   // Encoding: EVEX.NDS.XXX.66.0F.W0 EB /r
6968   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6969   attributes.set_is_evex_instruction();
6970   attributes.set_embedded_opmask_register_specifier(mask);
6971   if (merge) {
6972     attributes.reset_is_clear_context();
6973   }
6974   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6975   emit_int8((unsigned char)0xEB);
6976   emit_int8((unsigned char)(0xC0 | encode));
6977 }
6978 
6979 void Assembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
6980   assert(VM_Version::supports_evex(), "");
6981   // Encoding: EVEX.NDS.XXX.66.0F.W0 EB /r
6982   InstructionMark im(this);
6983   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6984   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
6985   attributes.set_is_evex_instruction();
6986   attributes.set_embedded_opmask_register_specifier(mask);
6987   if (merge) {
6988     attributes.reset_is_clear_context();
6989   }
6990   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6991   emit_int8((unsigned char)0xEB);
6992   emit_operand(dst, src);
6993 }
6994 
6995 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
6996   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6997   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6998   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6999   emit_int8((unsigned char)0xEF);
7000   emit_int8((unsigned char)(0xC0 | encode));
7001 }
7002 
7003 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
7004   assert(UseAVX > 0, "requires some form of AVX");
7005   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7006   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7007   emit_int8((unsigned char)0xEF);
7008   emit_int8((unsigned char)(0xC0 | encode));
7009 }
7010 
7011 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
7012   assert(UseAVX > 0, "requires some form of AVX");
7013   InstructionMark im(this);
7014   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7015   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
7016   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7017   emit_int8((unsigned char)0xEF);
7018   emit_operand(dst, src);
7019 }
7020 
7021 void Assembler::vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
7022   assert(UseAVX > 2, "requires some form of EVEX");
7023   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7024   attributes.set_rex_vex_w_reverted();
7025   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7026   emit_int8((unsigned char)0xEF);
7027   emit_int8((unsigned char)(0xC0 | encode));
7028 }
7029 
7030 void Assembler::evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
7031   assert(VM_Version::supports_evex(), "");
7032   // Encoding: EVEX.NDS.XXX.66.0F.W0 EF /r
7033   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7034   attributes.set_is_evex_instruction();
7035   attributes.set_embedded_opmask_register_specifier(mask);
7036   if (merge) {
7037     attributes.reset_is_clear_context();
7038   }
7039   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7040   emit_int8((unsigned char)0xEF);
7041   emit_int8((unsigned char)(0xC0 | encode));
7042 }
7043 
7044 // vinserti forms
7045 
7046 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
7047   assert(VM_Version::supports_avx2(), "");
7048   assert(imm8 <= 0x01, "imm8: %u", imm8);
7049   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
7050   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7051   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7052   emit_int8(0x38);
7053   emit_int8((unsigned char)(0xC0 | encode));
7054   // 0x00 - insert into lower 128 bits
7055   // 0x01 - insert into upper 128 bits
7056   emit_int8(imm8 & 0x01);
7057 }
7058 
7059 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
7060   assert(VM_Version::supports_avx2(), "");
7061   assert(dst != xnoreg, "sanity");
7062   assert(imm8 <= 0x01, "imm8: %u", imm8);
7063   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
7064   InstructionMark im(this);
7065   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7066   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7067   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7068   emit_int8(0x38);
7069   emit_operand(dst, src);
7070   // 0x00 - insert into lower 128 bits
7071   // 0x01 - insert into upper 128 bits
7072   emit_int8(imm8 & 0x01);
7073 }
7074 
7075 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
7076   assert(VM_Version::supports_evex(), "");
7077   assert(imm8 <= 0x03, "imm8: %u", imm8);
7078   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7079   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7080   emit_int8(0x38);
7081   emit_int8((unsigned char)(0xC0 | encode));
7082   // 0x00 - insert into q0 128 bits (0..127)
7083   // 0x01 - insert into q1 128 bits (128..255)
7084   // 0x02 - insert into q2 128 bits (256..383)
7085   // 0x03 - insert into q3 128 bits (384..511)
7086   emit_int8(imm8 & 0x03);
7087 }
7088 
7089 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
7090   assert(VM_Version::supports_avx(), "");
7091   assert(dst != xnoreg, "sanity");
7092   assert(imm8 <= 0x03, "imm8: %u", imm8);
7093   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
7094   InstructionMark im(this);
7095   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7096   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7097   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7098   emit_int8(0x18);
7099   emit_operand(dst, src);
7100   // 0x00 - insert into q0 128 bits (0..127)
7101   // 0x01 - insert into q1 128 bits (128..255)
7102   // 0x02 - insert into q2 128 bits (256..383)
7103   // 0x03 - insert into q3 128 bits (384..511)
7104   emit_int8(imm8 & 0x03);
7105 }
7106 
7107 void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
7108   assert(VM_Version::supports_evex(), "");
7109   assert(imm8 <= 0x01, "imm8: %u", imm8);
7110   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7111   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7112   emit_int8(0x38);
7113   emit_int8((unsigned char)(0xC0 | encode));
7114   // 0x00 - insert into lower 256 bits
7115   // 0x01 - insert into upper 256 bits
7116   emit_int8(imm8 & 0x01);
7117 }
7118 
7119 
7120 // vinsertf forms
7121 
7122 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
7123   assert(VM_Version::supports_avx(), "");
7124   assert(imm8 <= 0x01, "imm8: %u", imm8);
7125   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
7126   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7127   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7128   emit_int8(0x18);
7129   emit_int8((unsigned char)(0xC0 | encode));
7130   // 0x00 - insert into lower 128 bits
7131   // 0x01 - insert into upper 128 bits
7132   emit_int8(imm8 & 0x01);
7133 }
7134 
7135 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
7136   assert(VM_Version::supports_avx(), "");
7137   assert(dst != xnoreg, "sanity");
7138   assert(imm8 <= 0x01, "imm8: %u", imm8);
7139   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
7140   InstructionMark im(this);
7141   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7142   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7143   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7144   emit_int8(0x18);
7145   emit_operand(dst, src);
7146   // 0x00 - insert into lower 128 bits
7147   // 0x01 - insert into upper 128 bits
7148   emit_int8(imm8 & 0x01);
7149 }
7150 
7151 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
7152   assert(VM_Version::supports_evex(), "");
7153   assert(imm8 <= 0x03, "imm8: %u", imm8);
7154   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7155   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7156   emit_int8(0x18);
7157   emit_int8((unsigned char)(0xC0 | encode));
7158   // 0x00 - insert into q0 128 bits (0..127)
7159   // 0x01 - insert into q1 128 bits (128..255)
7160   // 0x02 - insert into q2 128 bits (256..383)
7161   // 0x03 - insert into q3 128 bits (384..511)
7162   emit_int8(imm8 & 0x03);
7163 }
7164 
7165 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
7166   assert(VM_Version::supports_avx(), "");
7167   assert(dst != xnoreg, "sanity");
7168   assert(imm8 <= 0x03, "imm8: %u", imm8);
7169   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
7170   InstructionMark im(this);
7171   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7172   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7173   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7174   emit_int8(0x18);
7175   emit_operand(dst, src);
7176   // 0x00 - insert into q0 128 bits (0..127)
7177   // 0x01 - insert into q1 128 bits (128..255)
7178   // 0x02 - insert into q2 128 bits (256..383)
7179   // 0x03 - insert into q3 128 bits (384..511)
7180   emit_int8(imm8 & 0x03);
7181 }
7182 
7183 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
7184   assert(VM_Version::supports_evex(), "");
7185   assert(imm8 <= 0x01, "imm8: %u", imm8);
7186   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7187   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7188   emit_int8(0x1A);
7189   emit_int8((unsigned char)(0xC0 | encode));
7190   // 0x00 - insert into lower 256 bits
7191   // 0x01 - insert into upper 256 bits
7192   emit_int8(imm8 & 0x01);
7193 }
7194 
7195 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
7196   assert(VM_Version::supports_evex(), "");
7197   assert(dst != xnoreg, "sanity");
7198   assert(imm8 <= 0x01, "imm8: %u", imm8);
7199   InstructionMark im(this);
7200   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7201   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
7202   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7203   emit_int8(0x1A);
7204   emit_operand(dst, src);
7205   // 0x00 - insert into lower 256 bits
7206   // 0x01 - insert into upper 256 bits
7207   emit_int8(imm8 & 0x01);
7208 }
7209 
7210 
7211 // vextracti forms
7212 
7213 void Assembler::vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7214   assert(VM_Version::supports_avx(), "");
7215   assert(imm8 <= 0x01, "imm8: %u", imm8);
7216   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
7217   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7218   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7219   emit_int8(0x39);
7220   emit_int8((unsigned char)(0xC0 | encode));
7221   // 0x00 - extract from lower 128 bits
7222   // 0x01 - extract from upper 128 bits
7223   emit_int8(imm8 & 0x01);
7224 }
7225 
7226 void Assembler::vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
7227   assert(VM_Version::supports_avx2(), "");
7228   assert(src != xnoreg, "sanity");
7229   assert(imm8 <= 0x01, "imm8: %u", imm8);
7230   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
7231   InstructionMark im(this);
7232   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7233   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7234   attributes.reset_is_clear_context();
7235   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7236   emit_int8(0x39);
7237   emit_operand(src, dst);
7238   // 0x00 - extract from lower 128 bits
7239   // 0x01 - extract from upper 128 bits
7240   emit_int8(imm8 & 0x01);
7241 }
7242 
7243 void Assembler::vextracti32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7244   assert(VM_Version::supports_avx(), "");
7245   assert(imm8 <= 0x03, "imm8: %u", imm8);
7246   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
7247   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7248   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7249   emit_int8(0x39);
7250   emit_int8((unsigned char)(0xC0 | encode));
7251   // 0x00 - extract from bits 127:0
7252   // 0x01 - extract from bits 255:128
7253   // 0x02 - extract from bits 383:256
7254   // 0x03 - extract from bits 511:384
7255   emit_int8(imm8 & 0x03);
7256 }
7257 
7258 void Assembler::vextracti32x4(Address dst, XMMRegister src, uint8_t imm8) {
7259   assert(VM_Version::supports_evex(), "");
7260   assert(src != xnoreg, "sanity");
7261   assert(imm8 <= 0x03, "imm8: %u", imm8);
7262   InstructionMark im(this);
7263   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7264   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7265   attributes.reset_is_clear_context();
7266   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7267   emit_int8(0x39);
7268   emit_operand(src, dst);
7269   // 0x00 - extract from bits 127:0
7270   // 0x01 - extract from bits 255:128
7271   // 0x02 - extract from bits 383:256
7272   // 0x03 - extract from bits 511:384
7273   emit_int8(imm8 & 0x03);
7274 }
7275 
7276 void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7277   assert(VM_Version::supports_avx512dq(), "");
7278   assert(imm8 <= 0x03, "imm8: %u", imm8);
7279   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7280   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7281   emit_int8(0x39);
7282   emit_int8((unsigned char)(0xC0 | encode));
7283   // 0x00 - extract from bits 127:0
7284   // 0x01 - extract from bits 255:128
7285   // 0x02 - extract from bits 383:256
7286   // 0x03 - extract from bits 511:384
7287   emit_int8(imm8 & 0x03);
7288 }
7289 
7290 void Assembler::vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7291   assert(VM_Version::supports_evex(), "");
7292   assert(imm8 <= 0x01, "imm8: %u", imm8);
7293   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7294   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7295   emit_int8(0x3B);
7296   emit_int8((unsigned char)(0xC0 | encode));
7297   // 0x00 - extract from lower 256 bits
7298   // 0x01 - extract from upper 256 bits
7299   emit_int8(imm8 & 0x01);
7300 }
7301 
7302 
7303 // vextractf forms
7304 
7305 void Assembler::vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7306   assert(VM_Version::supports_avx(), "");
7307   assert(imm8 <= 0x01, "imm8: %u", imm8);
7308   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
7309   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7310   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7311   emit_int8(0x19);
7312   emit_int8((unsigned char)(0xC0 | encode));
7313   // 0x00 - extract from lower 128 bits
7314   // 0x01 - extract from upper 128 bits
7315   emit_int8(imm8 & 0x01);
7316 }
7317 
7318 void Assembler::vextractf128(Address dst, XMMRegister src, uint8_t imm8) {
7319   assert(VM_Version::supports_avx(), "");
7320   assert(src != xnoreg, "sanity");
7321   assert(imm8 <= 0x01, "imm8: %u", imm8);
7322   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
7323   InstructionMark im(this);
7324   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7325   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7326   attributes.reset_is_clear_context();
7327   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7328   emit_int8(0x19);
7329   emit_operand(src, dst);
7330   // 0x00 - extract from lower 128 bits
7331   // 0x01 - extract from upper 128 bits
7332   emit_int8(imm8 & 0x01);
7333 }
7334 
7335 void Assembler::vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7336   assert(VM_Version::supports_avx(), "");
7337   assert(imm8 <= 0x03, "imm8: %u", imm8);
7338   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
7339   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7340   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7341   emit_int8(0x19);
7342   emit_int8((unsigned char)(0xC0 | encode));
7343   // 0x00 - extract from bits 127:0
7344   // 0x01 - extract from bits 255:128
7345   // 0x02 - extract from bits 383:256
7346   // 0x03 - extract from bits 511:384
7347   emit_int8(imm8 & 0x03);
7348 }
7349 
7350 void Assembler::vextractf32x4(Address dst, XMMRegister src, uint8_t imm8) {
7351   assert(VM_Version::supports_evex(), "");
7352   assert(src != xnoreg, "sanity");
7353   assert(imm8 <= 0x03, "imm8: %u", imm8);
7354   InstructionMark im(this);
7355   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7356   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7357   attributes.reset_is_clear_context();
7358   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7359   emit_int8(0x19);
7360   emit_operand(src, dst);
7361   // 0x00 - extract from bits 127:0
7362   // 0x01 - extract from bits 255:128
7363   // 0x02 - extract from bits 383:256
7364   // 0x03 - extract from bits 511:384
7365   emit_int8(imm8 & 0x03);
7366 }
7367 
7368 void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7369   assert(VM_Version::supports_avx512dq(), "");
7370   assert(imm8 <= 0x03, "imm8: %u", imm8);
7371   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7372   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7373   emit_int8(0x19);
7374   emit_int8((unsigned char)(0xC0 | encode));
7375   // 0x00 - extract from bits 127:0
7376   // 0x01 - extract from bits 255:128
7377   // 0x02 - extract from bits 383:256
7378   // 0x03 - extract from bits 511:384
7379   emit_int8(imm8 & 0x03);
7380 }
7381 
7382 void Assembler::vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7383   assert(VM_Version::supports_evex(), "");
7384   assert(imm8 <= 0x01, "imm8: %u", imm8);
7385   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7386   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7387   emit_int8(0x1B);
7388   emit_int8((unsigned char)(0xC0 | encode));
7389   // 0x00 - extract from lower 256 bits
7390   // 0x01 - extract from upper 256 bits
7391   emit_int8(imm8 & 0x01);
7392 }
7393 
7394 void Assembler::vextractf64x4(Address dst, XMMRegister src, uint8_t imm8) {
7395   assert(VM_Version::supports_evex(), "");
7396   assert(src != xnoreg, "sanity");
7397   assert(imm8 <= 0x01, "imm8: %u", imm8);
7398   InstructionMark im(this);
7399   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
7400   attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */  EVEX_64bit);
7401   attributes.reset_is_clear_context();
7402   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7403   emit_int8(0x1B);
7404   emit_operand(src, dst);
7405   // 0x00 - extract from lower 256 bits
7406   // 0x01 - extract from upper 256 bits
7407   emit_int8(imm8 & 0x01);
7408 }
7409 
7410 
7411 // legacy word/dword replicate
7412 void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
7413   assert(VM_Version::supports_avx2(), "");
7414   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7415   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7416   emit_int8(0x79);
7417   emit_int8((unsigned char)(0xC0 | encode));
7418 }
7419 
7420 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
7421   assert(VM_Version::supports_avx2(), "");
7422   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7423   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7424   emit_int8(0x58);
7425   emit_int8((unsigned char)(0xC0 | encode));
7426 }
7427 
7428 
7429 // xmm/mem sourced byte/word/dword/qword replicate
7430 
7431 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
7432 void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
7433   assert(VM_Version::supports_evex(), "");
7434   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7435   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7436   emit_int8(0x78);
7437   emit_int8((unsigned char)(0xC0 | encode));
7438 }
7439 
7440 void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
7441   assert(VM_Version::supports_evex(), "");
7442   assert(dst != xnoreg, "sanity");
7443   InstructionMark im(this);
7444   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7445   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
7446   // swap src<->dst for encoding
7447   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7448   emit_int8(0x78);
7449   emit_operand(dst, src);
7450 }
7451 
7452 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
7453 void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
7454   assert(VM_Version::supports_evex(), "");
7455   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7456   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7457   emit_int8(0x79);
7458   emit_int8((unsigned char)(0xC0 | encode));
7459 }
7460 
7461 void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) {
7462   assert(VM_Version::supports_evex(), "");
7463   assert(dst != xnoreg, "sanity");
7464   InstructionMark im(this);
7465   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7466   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
7467   // swap src<->dst for encoding
7468   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7469   emit_int8(0x79);
7470   emit_operand(dst, src);
7471 }
7472 
7473 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
7474 void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
7475   assert(VM_Version::supports_evex(), "");
7476   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7477   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7478   emit_int8(0x58);
7479   emit_int8((unsigned char)(0xC0 | encode));
7480 }
7481 
7482 void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
7483   assert(VM_Version::supports_evex(), "");
7484   assert(dst != xnoreg, "sanity");
7485   InstructionMark im(this);
7486   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7487   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7488   // swap src<->dst for encoding
7489   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7490   emit_int8(0x58);
7491   emit_operand(dst, src);
7492 }
7493 
7494 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
7495 void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
7496   assert(VM_Version::supports_evex(), "");
7497   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7498   attributes.set_rex_vex_w_reverted();
7499   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7500   emit_int8(0x59);
7501   emit_int8((unsigned char)(0xC0 | encode));
7502 }
7503 
7504 void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
7505   assert(VM_Version::supports_evex(), "");
7506   assert(dst != xnoreg, "sanity");
7507   InstructionMark im(this);
7508   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7509   attributes.set_rex_vex_w_reverted();
7510   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
7511   // swap src<->dst for encoding
7512   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7513   emit_int8(0x59);
7514   emit_operand(dst, src);
7515 }
7516 
7517 
7518 // scalar single/double precision replicate
7519 
7520 // duplicate single precision data from src into programmed locations in dest : requires AVX512VL
7521 void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
7522   assert(VM_Version::supports_evex(), "");
7523   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7524   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7525   emit_int8(0x18);
7526   emit_int8((unsigned char)(0xC0 | encode));
7527 }
7528 
7529 void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
7530   assert(VM_Version::supports_evex(), "");
7531   assert(dst != xnoreg, "sanity");
7532   InstructionMark im(this);
7533   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7534   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7535   // swap src<->dst for encoding
7536   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7537   emit_int8(0x18);
7538   emit_operand(dst, src);
7539 }
7540 
7541 // duplicate double precision data from src into programmed locations in dest : requires AVX512VL
7542 void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
7543   assert(VM_Version::supports_evex(), "");
7544   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7545   attributes.set_rex_vex_w_reverted();
7546   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7547   emit_int8(0x19);
7548   emit_int8((unsigned char)(0xC0 | encode));
7549 }
7550 
7551 void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
7552   assert(VM_Version::supports_evex(), "");
7553   assert(dst != xnoreg, "sanity");
7554   InstructionMark im(this);
7555   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7556   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
7557   attributes.set_rex_vex_w_reverted();
7558   // swap src<->dst for encoding
7559   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7560   emit_int8(0x19);
7561   emit_operand(dst, src);
7562 }
7563 
7564 
7565 // gpr source broadcast forms
7566 
7567 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
7568 void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
7569   assert(VM_Version::supports_evex(), "");
7570   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7571   attributes.set_is_evex_instruction();
7572   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7573   emit_int8(0x7A);
7574   emit_int8((unsigned char)(0xC0 | encode));
7575 }
7576 
7577 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
7578 void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
7579   assert(VM_Version::supports_evex(), "");
7580   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7581   attributes.set_is_evex_instruction();
7582   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7583   emit_int8(0x7B);
7584   emit_int8((unsigned char)(0xC0 | encode));
7585 }
7586 
7587 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
7588 void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
7589   assert(VM_Version::supports_evex(), "");
7590   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7591   attributes.set_is_evex_instruction();
7592   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7593   emit_int8(0x7C);
7594   emit_int8((unsigned char)(0xC0 | encode));
7595 }
7596 
7597 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
7598 void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
7599   assert(VM_Version::supports_evex(), "");
7600   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7601   attributes.set_is_evex_instruction();
7602   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7603   emit_int8(0x7C);
7604   emit_int8((unsigned char)(0xC0 | encode));
7605 }
7606 
7607 
7608 // Carry-Less Multiplication Quadword
7609 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
7610   assert(VM_Version::supports_clmul(), "");
7611   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7612   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7613   emit_int8(0x44);
7614   emit_int8((unsigned char)(0xC0 | encode));
7615   emit_int8((unsigned char)mask);
7616 }
7617 
7618 // Carry-Less Multiplication Quadword
7619 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
7620   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
7621   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7622   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7623   emit_int8(0x44);
7624   emit_int8((unsigned char)(0xC0 | encode));
7625   emit_int8((unsigned char)mask);
7626 }
7627 
7628 void Assembler::vzeroupper() {
7629   if (VM_Version::supports_vzeroupper()) {
7630     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
7631     (void)vex_prefix_and_encode(0, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
7632     emit_int8(0x77);
7633   }
7634 }
7635 
7636 #ifndef _LP64
7637 // 32bit only pieces of the assembler
7638 
7639 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
7640   // NO PREFIX AS NEVER 64BIT
7641   InstructionMark im(this);
7642   emit_int8((unsigned char)0x81);
7643   emit_int8((unsigned char)(0xF8 | src1->encoding()));
7644   emit_data(imm32, rspec, 0);
7645 }
7646 
7647 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
7648   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
7649   InstructionMark im(this);
7650   emit_int8((unsigned char)0x81);
7651   emit_operand(rdi, src1);
7652   emit_data(imm32, rspec, 0);
7653 }
7654 
7655 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
7656 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
7657 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
7658 void Assembler::cmpxchg8(Address adr) {
7659   InstructionMark im(this);
7660   emit_int8(0x0F);
7661   emit_int8((unsigned char)0xC7);
7662   emit_operand(rcx, adr);
7663 }
7664 
7665 void Assembler::decl(Register dst) {
7666   // Don't use it directly. Use MacroAssembler::decrementl() instead.
7667  emit_int8(0x48 | dst->encoding());
7668 }
7669 
7670 #endif // _LP64
7671 
7672 // 64bit typically doesn't use the x87 but needs to for the trig funcs
7673 
7674 void Assembler::fabs() {
7675   emit_int8((unsigned char)0xD9);
7676   emit_int8((unsigned char)0xE1);
7677 }
7678 
7679 void Assembler::fadd(int i) {
7680   emit_farith(0xD8, 0xC0, i);
7681 }
7682 
7683 void Assembler::fadd_d(Address src) {
7684   InstructionMark im(this);
7685   emit_int8((unsigned char)0xDC);
7686   emit_operand32(rax, src);
7687 }
7688 
7689 void Assembler::fadd_s(Address src) {
7690   InstructionMark im(this);
7691   emit_int8((unsigned char)0xD8);
7692   emit_operand32(rax, src);
7693 }
7694 
7695 void Assembler::fadda(int i) {
7696   emit_farith(0xDC, 0xC0, i);
7697 }
7698 
7699 void Assembler::faddp(int i) {
7700   emit_farith(0xDE, 0xC0, i);
7701 }
7702 
7703 void Assembler::fchs() {
7704   emit_int8((unsigned char)0xD9);
7705   emit_int8((unsigned char)0xE0);
7706 }
7707 
7708 void Assembler::fcom(int i) {
7709   emit_farith(0xD8, 0xD0, i);
7710 }
7711 
7712 void Assembler::fcomp(int i) {
7713   emit_farith(0xD8, 0xD8, i);
7714 }
7715 
7716 void Assembler::fcomp_d(Address src) {
7717   InstructionMark im(this);
7718   emit_int8((unsigned char)0xDC);
7719   emit_operand32(rbx, src);
7720 }
7721 
7722 void Assembler::fcomp_s(Address src) {
7723   InstructionMark im(this);
7724   emit_int8((unsigned char)0xD8);
7725   emit_operand32(rbx, src);
7726 }
7727 
7728 void Assembler::fcompp() {
7729   emit_int8((unsigned char)0xDE);
7730   emit_int8((unsigned char)0xD9);
7731 }
7732 
7733 void Assembler::fcos() {
7734   emit_int8((unsigned char)0xD9);
7735   emit_int8((unsigned char)0xFF);
7736 }
7737 
7738 void Assembler::fdecstp() {
7739   emit_int8((unsigned char)0xD9);
7740   emit_int8((unsigned char)0xF6);
7741 }
7742 
7743 void Assembler::fdiv(int i) {
7744   emit_farith(0xD8, 0xF0, i);
7745 }
7746 
7747 void Assembler::fdiv_d(Address src) {
7748   InstructionMark im(this);
7749   emit_int8((unsigned char)0xDC);
7750   emit_operand32(rsi, src);
7751 }
7752 
7753 void Assembler::fdiv_s(Address src) {
7754   InstructionMark im(this);
7755   emit_int8((unsigned char)0xD8);
7756   emit_operand32(rsi, src);
7757 }
7758 
7759 void Assembler::fdiva(int i) {
7760   emit_farith(0xDC, 0xF8, i);
7761 }
7762 
7763 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
7764 //       is erroneous for some of the floating-point instructions below.
7765 
7766 void Assembler::fdivp(int i) {
7767   emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
7768 }
7769 
7770 void Assembler::fdivr(int i) {
7771   emit_farith(0xD8, 0xF8, i);
7772 }
7773 
7774 void Assembler::fdivr_d(Address src) {
7775   InstructionMark im(this);
7776   emit_int8((unsigned char)0xDC);
7777   emit_operand32(rdi, src);
7778 }
7779 
7780 void Assembler::fdivr_s(Address src) {
7781   InstructionMark im(this);
7782   emit_int8((unsigned char)0xD8);
7783   emit_operand32(rdi, src);
7784 }
7785 
7786 void Assembler::fdivra(int i) {
7787   emit_farith(0xDC, 0xF0, i);
7788 }
7789 
7790 void Assembler::fdivrp(int i) {
7791   emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
7792 }
7793 
7794 void Assembler::ffree(int i) {
7795   emit_farith(0xDD, 0xC0, i);
7796 }
7797 
7798 void Assembler::fild_d(Address adr) {
7799   InstructionMark im(this);
7800   emit_int8((unsigned char)0xDF);
7801   emit_operand32(rbp, adr);
7802 }
7803 
7804 void Assembler::fild_s(Address adr) {
7805   InstructionMark im(this);
7806   emit_int8((unsigned char)0xDB);
7807   emit_operand32(rax, adr);
7808 }
7809 
7810 void Assembler::fincstp() {
7811   emit_int8((unsigned char)0xD9);
7812   emit_int8((unsigned char)0xF7);
7813 }
7814 
7815 void Assembler::finit() {
7816   emit_int8((unsigned char)0x9B);
7817   emit_int8((unsigned char)0xDB);
7818   emit_int8((unsigned char)0xE3);
7819 }
7820 
7821 void Assembler::fist_s(Address adr) {
7822   InstructionMark im(this);
7823   emit_int8((unsigned char)0xDB);
7824   emit_operand32(rdx, adr);
7825 }
7826 
7827 void Assembler::fistp_d(Address adr) {
7828   InstructionMark im(this);
7829   emit_int8((unsigned char)0xDF);
7830   emit_operand32(rdi, adr);
7831 }
7832 
7833 void Assembler::fistp_s(Address adr) {
7834   InstructionMark im(this);
7835   emit_int8((unsigned char)0xDB);
7836   emit_operand32(rbx, adr);
7837 }
7838 
7839 void Assembler::fld1() {
7840   emit_int8((unsigned char)0xD9);
7841   emit_int8((unsigned char)0xE8);
7842 }
7843 
7844 void Assembler::fld_d(Address adr) {
7845   InstructionMark im(this);
7846   emit_int8((unsigned char)0xDD);
7847   emit_operand32(rax, adr);
7848 }
7849 
7850 void Assembler::fld_s(Address adr) {
7851   InstructionMark im(this);
7852   emit_int8((unsigned char)0xD9);
7853   emit_operand32(rax, adr);
7854 }
7855 
7856 
7857 void Assembler::fld_s(int index) {
7858   emit_farith(0xD9, 0xC0, index);
7859 }
7860 
7861 void Assembler::fld_x(Address adr) {
7862   InstructionMark im(this);
7863   emit_int8((unsigned char)0xDB);
7864   emit_operand32(rbp, adr);
7865 }
7866 
7867 void Assembler::fldcw(Address src) {
7868   InstructionMark im(this);
7869   emit_int8((unsigned char)0xD9);
7870   emit_operand32(rbp, src);
7871 }
7872 
7873 void Assembler::fldenv(Address src) {
7874   InstructionMark im(this);
7875   emit_int8((unsigned char)0xD9);
7876   emit_operand32(rsp, src);
7877 }
7878 
7879 void Assembler::fldlg2() {
7880   emit_int8((unsigned char)0xD9);
7881   emit_int8((unsigned char)0xEC);
7882 }
7883 
7884 void Assembler::fldln2() {
7885   emit_int8((unsigned char)0xD9);
7886   emit_int8((unsigned char)0xED);
7887 }
7888 
7889 void Assembler::fldz() {
7890   emit_int8((unsigned char)0xD9);
7891   emit_int8((unsigned char)0xEE);
7892 }
7893 
7894 void Assembler::flog() {
7895   fldln2();
7896   fxch();
7897   fyl2x();
7898 }
7899 
7900 void Assembler::flog10() {
7901   fldlg2();
7902   fxch();
7903   fyl2x();
7904 }
7905 
7906 void Assembler::fmul(int i) {
7907   emit_farith(0xD8, 0xC8, i);
7908 }
7909 
7910 void Assembler::fmul_d(Address src) {
7911   InstructionMark im(this);
7912   emit_int8((unsigned char)0xDC);
7913   emit_operand32(rcx, src);
7914 }
7915 
7916 void Assembler::fmul_s(Address src) {
7917   InstructionMark im(this);
7918   emit_int8((unsigned char)0xD8);
7919   emit_operand32(rcx, src);
7920 }
7921 
7922 void Assembler::fmula(int i) {
7923   emit_farith(0xDC, 0xC8, i);
7924 }
7925 
7926 void Assembler::fmulp(int i) {
7927   emit_farith(0xDE, 0xC8, i);
7928 }
7929 
7930 void Assembler::fnsave(Address dst) {
7931   InstructionMark im(this);
7932   emit_int8((unsigned char)0xDD);
7933   emit_operand32(rsi, dst);
7934 }
7935 
7936 void Assembler::fnstcw(Address src) {
7937   InstructionMark im(this);
7938   emit_int8((unsigned char)0x9B);
7939   emit_int8((unsigned char)0xD9);
7940   emit_operand32(rdi, src);
7941 }
7942 
7943 void Assembler::fnstsw_ax() {
7944   emit_int8((unsigned char)0xDF);
7945   emit_int8((unsigned char)0xE0);
7946 }
7947 
7948 void Assembler::fprem() {
7949   emit_int8((unsigned char)0xD9);
7950   emit_int8((unsigned char)0xF8);
7951 }
7952 
7953 void Assembler::fprem1() {
7954   emit_int8((unsigned char)0xD9);
7955   emit_int8((unsigned char)0xF5);
7956 }
7957 
7958 void Assembler::frstor(Address src) {
7959   InstructionMark im(this);
7960   emit_int8((unsigned char)0xDD);
7961   emit_operand32(rsp, src);
7962 }
7963 
7964 void Assembler::fsin() {
7965   emit_int8((unsigned char)0xD9);
7966   emit_int8((unsigned char)0xFE);
7967 }
7968 
7969 void Assembler::fsqrt() {
7970   emit_int8((unsigned char)0xD9);
7971   emit_int8((unsigned char)0xFA);
7972 }
7973 
7974 void Assembler::fst_d(Address adr) {
7975   InstructionMark im(this);
7976   emit_int8((unsigned char)0xDD);
7977   emit_operand32(rdx, adr);
7978 }
7979 
7980 void Assembler::fst_s(Address adr) {
7981   InstructionMark im(this);
7982   emit_int8((unsigned char)0xD9);
7983   emit_operand32(rdx, adr);
7984 }
7985 
7986 void Assembler::fstp_d(Address adr) {
7987   InstructionMark im(this);
7988   emit_int8((unsigned char)0xDD);
7989   emit_operand32(rbx, adr);
7990 }
7991 
7992 void Assembler::fstp_d(int index) {
7993   emit_farith(0xDD, 0xD8, index);
7994 }
7995 
7996 void Assembler::fstp_s(Address adr) {
7997   InstructionMark im(this);
7998   emit_int8((unsigned char)0xD9);
7999   emit_operand32(rbx, adr);
8000 }
8001 
8002 void Assembler::fstp_x(Address adr) {
8003   InstructionMark im(this);
8004   emit_int8((unsigned char)0xDB);
8005   emit_operand32(rdi, adr);
8006 }
8007 
8008 void Assembler::fsub(int i) {
8009   emit_farith(0xD8, 0xE0, i);
8010 }
8011 
8012 void Assembler::fsub_d(Address src) {
8013   InstructionMark im(this);
8014   emit_int8((unsigned char)0xDC);
8015   emit_operand32(rsp, src);
8016 }
8017 
8018 void Assembler::fsub_s(Address src) {
8019   InstructionMark im(this);
8020   emit_int8((unsigned char)0xD8);
8021   emit_operand32(rsp, src);
8022 }
8023 
8024 void Assembler::fsuba(int i) {
8025   emit_farith(0xDC, 0xE8, i);
8026 }
8027 
8028 void Assembler::fsubp(int i) {
8029   emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
8030 }
8031 
8032 void Assembler::fsubr(int i) {
8033   emit_farith(0xD8, 0xE8, i);
8034 }
8035 
8036 void Assembler::fsubr_d(Address src) {
8037   InstructionMark im(this);
8038   emit_int8((unsigned char)0xDC);
8039   emit_operand32(rbp, src);
8040 }
8041 
8042 void Assembler::fsubr_s(Address src) {
8043   InstructionMark im(this);
8044   emit_int8((unsigned char)0xD8);
8045   emit_operand32(rbp, src);
8046 }
8047 
8048 void Assembler::fsubra(int i) {
8049   emit_farith(0xDC, 0xE0, i);
8050 }
8051 
8052 void Assembler::fsubrp(int i) {
8053   emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
8054 }
8055 
8056 void Assembler::ftan() {
8057   emit_int8((unsigned char)0xD9);
8058   emit_int8((unsigned char)0xF2);
8059   emit_int8((unsigned char)0xDD);
8060   emit_int8((unsigned char)0xD8);
8061 }
8062 
8063 void Assembler::ftst() {
8064   emit_int8((unsigned char)0xD9);
8065   emit_int8((unsigned char)0xE4);
8066 }
8067 
8068 void Assembler::fucomi(int i) {
8069   // make sure the instruction is supported (introduced for P6, together with cmov)
8070   guarantee(VM_Version::supports_cmov(), "illegal instruction");
8071   emit_farith(0xDB, 0xE8, i);
8072 }
8073 
8074 void Assembler::fucomip(int i) {
8075   // make sure the instruction is supported (introduced for P6, together with cmov)
8076   guarantee(VM_Version::supports_cmov(), "illegal instruction");
8077   emit_farith(0xDF, 0xE8, i);
8078 }
8079 
8080 void Assembler::fwait() {
8081   emit_int8((unsigned char)0x9B);
8082 }
8083 
8084 void Assembler::fxch(int i) {
8085   emit_farith(0xD9, 0xC8, i);
8086 }
8087 
8088 void Assembler::fyl2x() {
8089   emit_int8((unsigned char)0xD9);
8090   emit_int8((unsigned char)0xF1);
8091 }
8092 
8093 void Assembler::frndint() {
8094   emit_int8((unsigned char)0xD9);
8095   emit_int8((unsigned char)0xFC);
8096 }
8097 
8098 void Assembler::f2xm1() {
8099   emit_int8((unsigned char)0xD9);
8100   emit_int8((unsigned char)0xF0);
8101 }
8102 
8103 void Assembler::fldl2e() {
8104   emit_int8((unsigned char)0xD9);
8105   emit_int8((unsigned char)0xEA);
8106 }
8107 
8108 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
8109 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
8110 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
8111 static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
8112 
8113 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
8114 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
8115   if (pre > 0) {
8116     emit_int8(simd_pre[pre]);
8117   }
8118   if (rex_w) {
8119     prefixq(adr, xreg);
8120   } else {
8121     prefix(adr, xreg);
8122   }
8123   if (opc > 0) {
8124     emit_int8(0x0F);
8125     int opc2 = simd_opc[opc];
8126     if (opc2 > 0) {
8127       emit_int8(opc2);
8128     }
8129   }
8130 }
8131 
8132 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
8133   if (pre > 0) {
8134     emit_int8(simd_pre[pre]);
8135   }
8136   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : prefix_and_encode(dst_enc, src_enc);
8137   if (opc > 0) {
8138     emit_int8(0x0F);
8139     int opc2 = simd_opc[opc];
8140     if (opc2 > 0) {
8141       emit_int8(opc2);
8142     }
8143   }
8144   return encode;
8145 }
8146 
8147 
8148 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc) {
8149   int vector_len = _attributes->get_vector_len();
8150   bool vex_w = _attributes->is_rex_vex_w();
8151   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
8152     prefix(VEX_3bytes);
8153 
8154     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
8155     byte1 = (~byte1) & 0xE0;
8156     byte1 |= opc;
8157     emit_int8(byte1);
8158 
8159     int byte2 = ((~nds_enc) & 0xf) << 3;
8160     byte2 |= (vex_w ? VEX_W : 0) | ((vector_len > 0) ? 4 : 0) | pre;
8161     emit_int8(byte2);
8162   } else {
8163     prefix(VEX_2bytes);
8164 
8165     int byte1 = vex_r ? VEX_R : 0;
8166     byte1 = (~byte1) & 0x80;
8167     byte1 |= ((~nds_enc) & 0xf) << 3;
8168     byte1 |= ((vector_len > 0 ) ? 4 : 0) | pre;
8169     emit_int8(byte1);
8170   }
8171 }
8172 
8173 // This is a 4 byte encoding
8174 void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v, int nds_enc, VexSimdPrefix pre, VexOpcode opc){
8175   // EVEX 0x62 prefix
8176   prefix(EVEX_4bytes);
8177   bool vex_w = _attributes->is_rex_vex_w();
8178   int evex_encoding = (vex_w ? VEX_W : 0);
8179   // EVEX.b is not currently used for broadcast of single element or data rounding modes
8180   _attributes->set_evex_encoding(evex_encoding);
8181 
8182   // P0: byte 2, initialized to RXBR`00mm
8183   // instead of not'd
8184   int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0);
8185   byte2 = (~byte2) & 0xF0;
8186   // confine opc opcode extensions in mm bits to lower two bits
8187   // of form {0F, 0F_38, 0F_3A}
8188   byte2 |= opc;
8189   emit_int8(byte2);
8190 
8191   // P1: byte 3 as Wvvvv1pp
8192   int byte3 = ((~nds_enc) & 0xf) << 3;
8193   // p[10] is always 1
8194   byte3 |= EVEX_F;
8195   byte3 |= (vex_w & 1) << 7;
8196   // confine pre opcode extensions in pp bits to lower two bits
8197   // of form {66, F3, F2}
8198   byte3 |= pre;
8199   emit_int8(byte3);
8200 
8201   // P2: byte 4 as zL'Lbv'aaa
8202   // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
8203   int byte4 = (_attributes->is_no_reg_mask()) ?
8204               0 :
8205               _attributes->get_embedded_opmask_register_specifier();
8206   // EVEX.v` for extending EVEX.vvvv or VIDX
8207   byte4 |= (evex_v ? 0: EVEX_V);
8208   // third EXEC.b for broadcast actions
8209   byte4 |= (_attributes->is_extended_context() ? EVEX_Rb : 0);
8210   // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
8211   byte4 |= ((_attributes->get_vector_len())& 0x3) << 5;
8212   // last is EVEX.z for zero/merge actions
8213   if (_attributes->is_no_reg_mask() == false) {
8214     byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0);
8215   }
8216   emit_int8(byte4);
8217 }
8218 
8219 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
8220   bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0;
8221   bool vex_b = adr.base_needs_rex();
8222   bool vex_x = adr.index_needs_rex();
8223   set_attributes(attributes);
8224   attributes->set_current_assembler(this);
8225 
8226   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
8227   if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
8228     switch (attributes->get_vector_len()) {
8229     case AVX_128bit:
8230     case AVX_256bit:
8231       attributes->set_is_legacy_mode();
8232       break;
8233     }
8234   }
8235 
8236   // For pure EVEX check and see if this instruction
8237   // is allowed in legacy mode and has resources which will
8238   // fit in it.  Pure EVEX instructions will use set_is_evex_instruction in their definition,
8239   // else that field is set when we encode to EVEX
8240   if (UseAVX > 2 && !attributes->is_legacy_mode() &&
8241       !_is_managed && !attributes->is_evex_instruction()) {
8242     if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
8243       bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
8244       if (check_register_bank) {
8245         // check nds_enc and xreg_enc for upper bank usage
8246         if (nds_enc < 16 && xreg_enc < 16) {
8247           attributes->set_is_legacy_mode();
8248         }
8249       } else {
8250         attributes->set_is_legacy_mode();
8251       }
8252     }
8253   }
8254 
8255   _is_managed = false;
8256   if (UseAVX > 2 && !attributes->is_legacy_mode())
8257   {
8258     bool evex_r = (xreg_enc >= 16);
8259     bool evex_v = (nds_enc >= 16);
8260     attributes->set_is_evex_instruction();
8261     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
8262   } else {
8263     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
8264       attributes->set_rex_vex_w(false);
8265     }
8266     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
8267   }
8268 }
8269 
8270 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
8271   bool vex_r = ((dst_enc & 8) == 8) ? 1 : 0;
8272   bool vex_b = ((src_enc & 8) == 8) ? 1 : 0;
8273   bool vex_x = false;
8274   set_attributes(attributes);
8275   attributes->set_current_assembler(this);
8276   bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
8277 
8278   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
8279   if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
8280     switch (attributes->get_vector_len()) {
8281     case AVX_128bit:
8282     case AVX_256bit:
8283       if (check_register_bank) {
8284         if (dst_enc >= 16 || nds_enc >= 16 || src_enc >= 16) {
8285           // up propagate arithmetic instructions to meet RA requirements
8286           attributes->set_vector_len(AVX_512bit);
8287         } else {
8288           attributes->set_is_legacy_mode();
8289         }
8290       } else {
8291         attributes->set_is_legacy_mode();
8292       }
8293       break;
8294     }
8295   }
8296 
8297   // For pure EVEX check and see if this instruction
8298   // is allowed in legacy mode and has resources which will
8299   // fit in it.  Pure EVEX instructions will use set_is_evex_instruction in their definition,
8300   // else that field is set when we encode to EVEX
8301   if (UseAVX > 2 && !attributes->is_legacy_mode() &&
8302       !_is_managed && !attributes->is_evex_instruction()) {
8303     if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
8304       if (check_register_bank) {
8305         // check dst_enc, nds_enc and src_enc for upper bank usage
8306         if (dst_enc < 16 && nds_enc < 16 && src_enc < 16) {
8307           attributes->set_is_legacy_mode();
8308         }
8309       } else {
8310         attributes->set_is_legacy_mode();
8311       }
8312     }
8313   }
8314 
8315   _is_managed = false;
8316   if (UseAVX > 2 && !attributes->is_legacy_mode())
8317   {
8318     bool evex_r = (dst_enc >= 16);
8319     bool evex_v = (nds_enc >= 16);
8320     // can use vex_x as bank extender on rm encoding
8321     vex_x = (src_enc >= 16);
8322     attributes->set_is_evex_instruction();
8323     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
8324   } else {
8325     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
8326       attributes->set_rex_vex_w(false);
8327     }
8328     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
8329   }
8330 
8331   // return modrm byte components for operands
8332   return (((dst_enc & 7) << 3) | (src_enc & 7));
8333 }
8334 
8335 
8336 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
8337                             VexOpcode opc, InstructionAttr *attributes) {
8338   if (UseAVX > 0) {
8339     int xreg_enc = xreg->encoding();
8340     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
8341     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, attributes);
8342   } else {
8343     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
8344     rex_prefix(adr, xreg, pre, opc, attributes->is_rex_vex_w());
8345   }
8346 }
8347 
8348 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
8349                                       VexOpcode opc, InstructionAttr *attributes) {
8350   int dst_enc = dst->encoding();
8351   int src_enc = src->encoding();
8352   if (UseAVX > 0) {
8353     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
8354     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes);
8355   } else {
8356     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
8357     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, attributes->is_rex_vex_w());
8358   }
8359 }
8360 
8361 void Assembler::vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
8362   assert(VM_Version::supports_avx(), "");
8363   assert(!VM_Version::supports_evex(), "");
8364   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
8365   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8366   emit_int8((unsigned char)0xC2);
8367   emit_int8((unsigned char)(0xC0 | encode));
8368   emit_int8((unsigned char)(0x1F & cop));
8369 }
8370 
8371 void Assembler::vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
8372   assert(UseAVX > 0 && (vector_len == AVX_128bit || vector_len == AVX_256bit), "");
8373   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
8374   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8375   emit_int8((unsigned char)0x4B);
8376   emit_int8((unsigned char)(0xC0 | encode));
8377   int src2_enc = src2->encoding();
8378   emit_int8((unsigned char)(0xF0 & src2_enc<<4));
8379 }
8380 
8381 void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
8382   assert(VM_Version::supports_avx2(), "");
8383   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
8384   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8385   emit_int8((unsigned char)0x02);
8386   emit_int8((unsigned char)(0xC0 | encode));
8387   emit_int8((unsigned char)imm8);
8388 }
8389 
8390 void Assembler::vcmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int comparison, int vector_len) {
8391   assert(VM_Version::supports_avx(), "");
8392   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8393   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
8394   emit_int8((unsigned char)0xC2);
8395   emit_int8((unsigned char)(0xC0 | encode));
8396   emit_int8((unsigned char)comparison);
8397 }
8398 
8399 void Assembler::evcmpps(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8400                         ComparisonPredicateFP comparison, int vector_len) {
8401   assert(VM_Version::supports_evex(), "");
8402   // Encoding: EVEX.NDS.XXX.0F.W0 C2 /r ib
8403   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8404   attributes.set_is_evex_instruction();
8405   attributes.set_embedded_opmask_register_specifier(mask);
8406   attributes.reset_is_clear_context();
8407   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
8408   emit_int8((unsigned char)0xC2);
8409   emit_int8((unsigned char)(0xC0 | encode));
8410   emit_int8((unsigned char)comparison);
8411 }
8412 
8413 void Assembler::evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8414                         ComparisonPredicateFP comparison, int vector_len) {
8415   assert(VM_Version::supports_evex(), "");
8416   // Encoding: EVEX.NDS.XXX.66.0F.W1 C2 /r ib
8417   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8418   attributes.set_is_evex_instruction();
8419   attributes.set_embedded_opmask_register_specifier(mask);
8420   attributes.reset_is_clear_context();
8421   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8422   emit_int8((unsigned char)0xC2);
8423   emit_int8((unsigned char)(0xC0 | encode));
8424   emit_int8((unsigned char)comparison);
8425 }
8426 
8427 void Assembler::blendvps(XMMRegister dst, XMMRegister src) {
8428   assert(VM_Version::supports_sse4_1(), "");
8429   assert(UseAVX <= 0, "sse encoding is inconsistent with avx encoding");
8430   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
8431   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8432   emit_int8(0x14);
8433   emit_int8((unsigned char)(0xC0 | encode));
8434 }
8435 
8436 void Assembler::blendvpd(XMMRegister dst, XMMRegister src) {
8437   assert(VM_Version::supports_sse4_1(), "");
8438   assert(UseAVX <= 0, "sse encoding is inconsistent with avx encoding");
8439   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
8440   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8441   emit_int8(0x15);
8442   emit_int8((unsigned char)(0xC0 | encode));
8443 }
8444 
8445 void Assembler::pblendvb(XMMRegister dst, XMMRegister src) {
8446   assert(VM_Version::supports_sse4_1(), "");
8447   assert(UseAVX <= 0, "sse encoding is inconsistent with avx encoding");
8448   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
8449   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8450   emit_int8(0x10);
8451   emit_int8((unsigned char)(0xC0 | encode));
8452 }
8453 
8454 void Assembler::vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len) {
8455   assert(UseAVX > 0 && (vector_len == AVX_128bit || vector_len == AVX_256bit), "");
8456   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8457   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8458   emit_int8((unsigned char)0x4A);
8459   emit_int8((unsigned char)(0xC0 | encode));
8460   int mask_enc = mask->encoding();
8461   emit_int8((unsigned char)(0xF0 & mask_enc<<4));
8462 }
8463 
8464 void Assembler::vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
8465   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
8466   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
8467   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
8468   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8469   emit_int8((unsigned char)0x64);
8470   emit_int8((unsigned char)(0xC0 | encode));
8471 }
8472 
8473 void Assembler::vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
8474   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
8475   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
8476   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
8477   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8478   emit_int8((unsigned char)0x65);
8479   emit_int8((unsigned char)(0xC0 | encode));
8480 }
8481 
8482 void Assembler::vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
8483   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
8484   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
8485   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
8486   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8487   emit_int8((unsigned char)0x66);
8488   emit_int8((unsigned char)(0xC0 | encode));
8489 }
8490 
8491 void Assembler::vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
8492   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
8493   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
8494   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
8495   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8496   emit_int8((unsigned char)0x37);
8497   emit_int8((unsigned char)(0xC0 | encode));
8498 }
8499 
8500 void Assembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8501                         int comparison, int vector_len) {
8502   assert(VM_Version::supports_evex(), "");
8503   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8504   // Encoding: EVEX.NDS.XXX.66.0F3A.W0 1F /r ib
8505   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8506   attributes.set_is_evex_instruction();
8507   attributes.set_embedded_opmask_register_specifier(mask);
8508   attributes.reset_is_clear_context();
8509   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8510   emit_int8((unsigned char)0x1F);
8511   emit_int8((unsigned char)(0xC0 | encode));
8512   emit_int8((unsigned char)comparison);
8513 }
8514 
8515 void Assembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
8516                         int comparison, int vector_len) {
8517   assert(VM_Version::supports_evex(), "");
8518   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8519   // Encoding: EVEX.NDS.XXX.66.0F3A.W0 1F /r ib
8520   InstructionMark im(this);
8521   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8522   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
8523   attributes.set_is_evex_instruction();
8524   attributes.set_embedded_opmask_register_specifier(mask);
8525   attributes.reset_is_clear_context();
8526   int dst_enc = kdst->encoding();
8527   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8528   emit_int8((unsigned char)0x1F);
8529   emit_operand(as_Register(dst_enc), src);
8530   emit_int8((unsigned char)comparison);
8531 }
8532 
8533 void Assembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8534                         int comparison, int vector_len) {
8535   assert(VM_Version::supports_evex(), "");
8536   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8537   // Encoding: EVEX.NDS.XXX.66.0F3A.W1 1F /r ib
8538   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8539   attributes.set_is_evex_instruction();
8540   attributes.set_embedded_opmask_register_specifier(mask);
8541   attributes.reset_is_clear_context();
8542   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8543   emit_int8((unsigned char)0x1F);
8544   emit_int8((unsigned char)(0xC0 | encode));
8545   emit_int8((unsigned char)comparison);
8546 }
8547 
8548 void Assembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
8549                         int comparison, int vector_len) {
8550   assert(VM_Version::supports_evex(), "");
8551   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8552   // Encoding: EVEX.NDS.XXX.66.0F3A.W1 1F /r ib
8553   InstructionMark im(this);
8554   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8555   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
8556   attributes.set_is_evex_instruction();
8557   attributes.set_embedded_opmask_register_specifier(mask);
8558   attributes.reset_is_clear_context();
8559   int dst_enc = kdst->encoding();
8560   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8561   emit_int8((unsigned char)0x1F);
8562   emit_operand(as_Register(dst_enc), src);
8563   emit_int8((unsigned char)comparison);
8564 }
8565 
8566 void Assembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8567                         int comparison, int vector_len) {
8568   assert(VM_Version::supports_evex(), "");
8569   assert(VM_Version::supports_avx512bw(), "");
8570   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8571   // Encoding: EVEX.NDS.XXX.66.0F3A.W0 3F /r ib
8572   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8573   attributes.set_is_evex_instruction();
8574   attributes.set_embedded_opmask_register_specifier(mask);
8575   attributes.reset_is_clear_context();
8576   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8577   emit_int8((unsigned char)0x3F);
8578   emit_int8((unsigned char)(0xC0 | encode));
8579   emit_int8((unsigned char)comparison);
8580 }
8581 
8582 void Assembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
8583                         int comparison, int vector_len) {
8584   assert(VM_Version::supports_evex(), "");
8585   assert(VM_Version::supports_avx512bw(), "");
8586   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8587   // Encoding: EVEX.NDS.XXX.66.0F3A.W0 3F /r ib
8588   InstructionMark im(this);
8589   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8590   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
8591   attributes.set_is_evex_instruction();
8592   attributes.set_embedded_opmask_register_specifier(mask);
8593   attributes.reset_is_clear_context();
8594   int dst_enc = kdst->encoding();
8595   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8596   emit_int8((unsigned char)0x3F);
8597   emit_operand(as_Register(dst_enc), src);
8598   emit_int8((unsigned char)comparison);
8599 }
8600 
8601 void Assembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8602                         int comparison, int vector_len) {
8603   assert(VM_Version::supports_evex(), "");
8604   assert(VM_Version::supports_avx512bw(), "");
8605   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8606   // Encoding: EVEX.NDS.XXX.66.0F3A.W1 3F /r ib
8607   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8608   attributes.set_is_evex_instruction();
8609   attributes.set_embedded_opmask_register_specifier(mask);
8610   attributes.reset_is_clear_context();
8611   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8612   emit_int8((unsigned char)0x3F);
8613   emit_int8((unsigned char)(0xC0 | encode));
8614   emit_int8((unsigned char)comparison);
8615 }
8616 
8617 void Assembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
8618                         int comparison, int vector_len) {
8619   assert(VM_Version::supports_evex(), "");
8620   assert(VM_Version::supports_avx512bw(), "");
8621   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8622   // Encoding: EVEX.NDS.XXX.66.0F3A.W1 3F /r ib
8623   InstructionMark im(this);
8624   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8625   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
8626   attributes.set_is_evex_instruction();
8627   attributes.set_embedded_opmask_register_specifier(mask);
8628   attributes.reset_is_clear_context();
8629   int dst_enc = kdst->encoding();
8630   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8631   emit_int8((unsigned char)0x3F);
8632   emit_operand(as_Register(dst_enc), src);
8633   emit_int8((unsigned char)comparison);
8634 }
8635 
8636 void Assembler::vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len) {
8637   assert(VM_Version::supports_avx(), "");
8638   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8639   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8640   emit_int8((unsigned char)0x4C);
8641   emit_int8((unsigned char)(0xC0 | encode));
8642   int mask_enc = mask->encoding();
8643   emit_int8((unsigned char)(0xF0 & mask_enc << 4));
8644 }
8645 
8646 void Assembler::evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8647   assert(VM_Version::supports_evex(), "");
8648   // Encoding: EVEX.NDS.XXX.66.0F38.W1 65 /r
8649   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8650   attributes.set_is_evex_instruction();
8651   attributes.set_embedded_opmask_register_specifier(mask);
8652   if (merge) {
8653     attributes.reset_is_clear_context();
8654   }
8655   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8656   emit_int8((unsigned char)0x65);
8657   emit_int8((unsigned char)(0xC0 | encode));
8658 }
8659 
8660 void Assembler::evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8661   assert(VM_Version::supports_evex(), "");
8662   // Encoding: EVEX.NDS.XXX.66.0F38.W0 65 /r
8663   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8664   attributes.set_is_evex_instruction();
8665   attributes.set_embedded_opmask_register_specifier(mask);
8666   if (merge) {
8667     attributes.reset_is_clear_context();
8668   }
8669   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8670   emit_int8((unsigned char)0x65);
8671   emit_int8((unsigned char)(0xC0 | encode));
8672 }
8673 
8674 void Assembler::evpblendmb (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8675   assert(VM_Version::supports_evex(), "");
8676   assert(VM_Version::supports_avx512bw(), "");
8677   // Encoding: EVEX.NDS.512.66.0F38.W0 66 /r
8678   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8679   attributes.set_is_evex_instruction();
8680   attributes.set_embedded_opmask_register_specifier(mask);
8681   if (merge) {
8682     attributes.reset_is_clear_context();
8683   }
8684   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);       
8685   emit_int8((unsigned char)0x66);
8686   emit_int8((unsigned char)(0xC0 | encode));
8687 }
8688 
8689 void Assembler::evpblendmw (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8690   assert(VM_Version::supports_evex(), "");
8691   assert(VM_Version::supports_avx512bw(), "");
8692   // Encoding: EVEX.NDS.512.66.0F38.W1 66 /r
8693   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8694   attributes.set_is_evex_instruction();
8695   attributes.set_embedded_opmask_register_specifier(mask);
8696   if (merge) {
8697     attributes.reset_is_clear_context();
8698   }
8699   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);       
8700   emit_int8((unsigned char)0x66);
8701   emit_int8((unsigned char)(0xC0 | encode));    
8702 }
8703 
8704 void Assembler::evpblendmd (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8705   assert(VM_Version::supports_evex(), "");
8706   //Encoding: EVEX.NDS.512.66.0F38.W0 64 /r
8707   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8708   attributes.set_is_evex_instruction();
8709   attributes.set_embedded_opmask_register_specifier(mask);
8710   if (merge) {
8711     attributes.reset_is_clear_context();
8712   }
8713   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);       
8714   emit_int8((unsigned char)0x64);
8715   emit_int8((unsigned char)(0xC0 | encode));
8716 }
8717 
8718 void Assembler::evpblendmq (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8719   assert(VM_Version::supports_evex(), "");      
8720   //Encoding: EVEX.NDS.512.66.0F38.W1 64 /r
8721   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8722   attributes.set_is_evex_instruction();
8723   attributes.set_embedded_opmask_register_specifier(mask);
8724   if (merge) {
8725     attributes.reset_is_clear_context();
8726   }     
8727   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);       
8728   emit_int8((unsigned char)0x64);
8729   emit_int8((unsigned char)(0xC0 | encode));    
8730 }
8731 
8732 void Assembler::shlxl(Register dst, Register src1, Register src2) {
8733   assert(VM_Version::supports_bmi2(), "");
8734   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8735   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8736   emit_int8((unsigned char)0xF7);
8737   emit_int8((unsigned char)(0xC0 | encode));
8738 }
8739 
8740 void Assembler::shlxq(Register dst, Register src1, Register src2) {
8741   assert(VM_Version::supports_bmi2(), "");
8742   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8743   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8744   emit_int8((unsigned char)0xF7);
8745   emit_int8((unsigned char)(0xC0 | encode));
8746 }
8747 
8748 #ifndef _LP64
8749 
8750 void Assembler::incl(Register dst) {
8751   // Don't use it directly. Use MacroAssembler::incrementl() instead.
8752   emit_int8(0x40 | dst->encoding());
8753 }
8754 
8755 void Assembler::lea(Register dst, Address src) {
8756   leal(dst, src);
8757 }
8758 
8759 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) {
8760   InstructionMark im(this);
8761   emit_int8((unsigned char)0xC7);
8762   emit_operand(rax, dst);
8763   emit_data((int)imm32, rspec, 0);
8764 }
8765 
8766 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
8767   InstructionMark im(this);
8768   int encode = prefix_and_encode(dst->encoding());
8769   emit_int8((unsigned char)(0xB8 | encode));
8770   emit_data((int)imm32, rspec, 0);
8771 }
8772 
8773 void Assembler::popa() { // 32bit
8774   emit_int8(0x61);
8775 }
8776 
8777 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
8778   InstructionMark im(this);
8779   emit_int8(0x68);
8780   emit_data(imm32, rspec, 0);
8781 }
8782 
8783 void Assembler::pusha() { // 32bit
8784   emit_int8(0x60);
8785 }
8786 
8787 void Assembler::set_byte_if_not_zero(Register dst) {
8788   emit_int8(0x0F);
8789   emit_int8((unsigned char)0x95);
8790   emit_int8((unsigned char)(0xE0 | dst->encoding()));
8791 }
8792 
8793 void Assembler::shldl(Register dst, Register src) {
8794   emit_int8(0x0F);
8795   emit_int8((unsigned char)0xA5);
8796   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
8797 }
8798 
8799 // 0F A4 / r ib
8800 void Assembler::shldl(Register dst, Register src, int8_t imm8) {
8801   emit_int8(0x0F);
8802   emit_int8((unsigned char)0xA4);
8803   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
8804   emit_int8(imm8);
8805 }
8806 
8807 void Assembler::shrdl(Register dst, Register src) {
8808   emit_int8(0x0F);
8809   emit_int8((unsigned char)0xAD);
8810   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
8811 }
8812 
8813 #else // LP64
8814 
8815 void Assembler::set_byte_if_not_zero(Register dst) {
8816   int enc = prefix_and_encode(dst->encoding(), true);
8817   emit_int8(0x0F);
8818   emit_int8((unsigned char)0x95);
8819   emit_int8((unsigned char)(0xE0 | enc));
8820 }
8821 
8822 // 64bit only pieces of the assembler
8823 // This should only be used by 64bit instructions that can use rip-relative
8824 // it cannot be used by instructions that want an immediate value.
8825 
8826 bool Assembler::reachable(AddressLiteral adr) {
8827   int64_t disp;
8828   // None will force a 64bit literal to the code stream. Likely a placeholder
8829   // for something that will be patched later and we need to certain it will
8830   // always be reachable.
8831   if (adr.reloc() == relocInfo::none) {
8832     return false;
8833   }
8834   if (adr.reloc() == relocInfo::internal_word_type) {
8835     // This should be rip relative and easily reachable.
8836     return true;
8837   }
8838   if (adr.reloc() == relocInfo::virtual_call_type ||
8839       adr.reloc() == relocInfo::opt_virtual_call_type ||
8840       adr.reloc() == relocInfo::static_call_type ||
8841       adr.reloc() == relocInfo::static_stub_type ) {
8842     // This should be rip relative within the code cache and easily
8843     // reachable until we get huge code caches. (At which point
8844     // ic code is going to have issues).
8845     return true;
8846   }
8847   if (adr.reloc() != relocInfo::external_word_type &&
8848       adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
8849       adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
8850       adr.reloc() != relocInfo::runtime_call_type ) {
8851     return false;
8852   }
8853 
8854   // Stress the correction code
8855   if (ForceUnreachable) {
8856     // Must be runtimecall reloc, see if it is in the codecache
8857     // Flipping stuff in the codecache to be unreachable causes issues
8858     // with things like inline caches where the additional instructions
8859     // are not handled.
8860     if (CodeCache::find_blob(adr._target) == NULL) {
8861       return false;
8862     }
8863   }
8864   // For external_word_type/runtime_call_type if it is reachable from where we
8865   // are now (possibly a temp buffer) and where we might end up
8866   // anywhere in the codeCache then we are always reachable.
8867   // This would have to change if we ever save/restore shared code
8868   // to be more pessimistic.
8869   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
8870   if (!is_simm32(disp)) return false;
8871   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
8872   if (!is_simm32(disp)) return false;
8873 
8874   disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
8875 
8876   // Because rip relative is a disp + address_of_next_instruction and we
8877   // don't know the value of address_of_next_instruction we apply a fudge factor
8878   // to make sure we will be ok no matter the size of the instruction we get placed into.
8879   // We don't have to fudge the checks above here because they are already worst case.
8880 
8881   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
8882   // + 4 because better safe than sorry.
8883   const int fudge = 12 + 4;
8884   if (disp < 0) {
8885     disp -= fudge;
8886   } else {
8887     disp += fudge;
8888   }
8889   return is_simm32(disp);
8890 }
8891 
8892 // Check if the polling page is not reachable from the code cache using rip-relative
8893 // addressing.
8894 bool Assembler::is_polling_page_far() {
8895   intptr_t addr = (intptr_t)os::get_polling_page();
8896   return ForceUnreachable ||
8897          !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
8898          !is_simm32(addr - (intptr_t)CodeCache::high_bound());
8899 }
8900 
8901 void Assembler::emit_data64(jlong data,
8902                             relocInfo::relocType rtype,
8903                             int format) {
8904   if (rtype == relocInfo::none) {
8905     emit_int64(data);
8906   } else {
8907     emit_data64(data, Relocation::spec_simple(rtype), format);
8908   }
8909 }
8910 
8911 void Assembler::emit_data64(jlong data,
8912                             RelocationHolder const& rspec,
8913                             int format) {
8914   assert(imm_operand == 0, "default format must be immediate in this file");
8915   assert(imm_operand == format, "must be immediate");
8916   assert(inst_mark() != NULL, "must be inside InstructionMark");
8917   // Do not use AbstractAssembler::relocate, which is not intended for
8918   // embedded words.  Instead, relocate to the enclosing instruction.
8919   code_section()->relocate(inst_mark(), rspec, format);
8920 #ifdef ASSERT
8921   check_relocation(rspec, format);
8922 #endif
8923   emit_int64(data);
8924 }
8925 
8926 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
8927   if (reg_enc >= 8) {
8928     prefix(REX_B);
8929     reg_enc -= 8;
8930   } else if (byteinst && reg_enc >= 4) {
8931     prefix(REX);
8932   }
8933   return reg_enc;
8934 }
8935 
8936 int Assembler::prefixq_and_encode(int reg_enc) {
8937   if (reg_enc < 8) {
8938     prefix(REX_W);
8939   } else {
8940     prefix(REX_WB);
8941     reg_enc -= 8;
8942   }
8943   return reg_enc;
8944 }
8945 
8946 int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte) {
8947   if (dst_enc < 8) {
8948     if (src_enc >= 8) {
8949       prefix(REX_B);
8950       src_enc -= 8;
8951     } else if ((src_is_byte && src_enc >= 4) || (dst_is_byte && dst_enc >= 4)) {
8952       prefix(REX);
8953     }
8954   } else {
8955     if (src_enc < 8) {
8956       prefix(REX_R);
8957     } else {
8958       prefix(REX_RB);
8959       src_enc -= 8;
8960     }
8961     dst_enc -= 8;
8962   }
8963   return dst_enc << 3 | src_enc;
8964 }
8965 
8966 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
8967   if (dst_enc < 8) {
8968     if (src_enc < 8) {
8969       prefix(REX_W);
8970     } else {
8971       prefix(REX_WB);
8972       src_enc -= 8;
8973     }
8974   } else {
8975     if (src_enc < 8) {
8976       prefix(REX_WR);
8977     } else {
8978       prefix(REX_WRB);
8979       src_enc -= 8;
8980     }
8981     dst_enc -= 8;
8982   }
8983   return dst_enc << 3 | src_enc;
8984 }
8985 
8986 void Assembler::prefix(Register reg) {
8987   if (reg->encoding() >= 8) {
8988     prefix(REX_B);
8989   }
8990 }
8991 
8992 void Assembler::prefix(Register dst, Register src, Prefix p) {
8993   if (src->encoding() >= 8) {
8994     p = (Prefix)(p | REX_B);
8995   }
8996   if (dst->encoding() >= 8) {
8997     p = (Prefix)( p | REX_R);
8998   }
8999   if (p != Prefix_EMPTY) {
9000     // do not generate an empty prefix
9001     prefix(p);
9002   }
9003 }
9004 
9005 void Assembler::prefix(Register dst, Address adr, Prefix p) {
9006   if (adr.base_needs_rex()) {
9007     if (adr.index_needs_rex()) {
9008       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
9009     } else {
9010       prefix(REX_B);
9011     }
9012   } else {
9013     if (adr.index_needs_rex()) {
9014       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
9015     }
9016   }
9017   if (dst->encoding() >= 8) {
9018     p = (Prefix)(p | REX_R);
9019   }
9020   if (p != Prefix_EMPTY) {
9021     // do not generate an empty prefix
9022     prefix(p);
9023   }
9024 }
9025 
9026 void Assembler::prefix(Address adr) {
9027   if (adr.base_needs_rex()) {
9028     if (adr.index_needs_rex()) {
9029       prefix(REX_XB);
9030     } else {
9031       prefix(REX_B);
9032     }
9033   } else {
9034     if (adr.index_needs_rex()) {
9035       prefix(REX_X);
9036     }
9037   }
9038 }
9039 
9040 void Assembler::prefixq(Address adr) {
9041   if (adr.base_needs_rex()) {
9042     if (adr.index_needs_rex()) {
9043       prefix(REX_WXB);
9044     } else {
9045       prefix(REX_WB);
9046     }
9047   } else {
9048     if (adr.index_needs_rex()) {
9049       prefix(REX_WX);
9050     } else {
9051       prefix(REX_W);
9052     }
9053   }
9054 }
9055 
9056 
9057 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
9058   if (reg->encoding() < 8) {
9059     if (adr.base_needs_rex()) {
9060       if (adr.index_needs_rex()) {
9061         prefix(REX_XB);
9062       } else {
9063         prefix(REX_B);
9064       }
9065     } else {
9066       if (adr.index_needs_rex()) {
9067         prefix(REX_X);
9068       } else if (byteinst && reg->encoding() >= 4 ) {
9069         prefix(REX);
9070       }
9071     }
9072   } else {
9073     if (adr.base_needs_rex()) {
9074       if (adr.index_needs_rex()) {
9075         prefix(REX_RXB);
9076       } else {
9077         prefix(REX_RB);
9078       }
9079     } else {
9080       if (adr.index_needs_rex()) {
9081         prefix(REX_RX);
9082       } else {
9083         prefix(REX_R);
9084       }
9085     }
9086   }
9087 }
9088 
9089 void Assembler::prefixq(Address adr, Register src) {
9090   if (src->encoding() < 8) {
9091     if (adr.base_needs_rex()) {
9092       if (adr.index_needs_rex()) {
9093         prefix(REX_WXB);
9094       } else {
9095         prefix(REX_WB);
9096       }
9097     } else {
9098       if (adr.index_needs_rex()) {
9099         prefix(REX_WX);
9100       } else {
9101         prefix(REX_W);
9102       }
9103     }
9104   } else {
9105     if (adr.base_needs_rex()) {
9106       if (adr.index_needs_rex()) {
9107         prefix(REX_WRXB);
9108       } else {
9109         prefix(REX_WRB);
9110       }
9111     } else {
9112       if (adr.index_needs_rex()) {
9113         prefix(REX_WRX);
9114       } else {
9115         prefix(REX_WR);
9116       }
9117     }
9118   }
9119 }
9120 
9121 void Assembler::prefix(Address adr, XMMRegister reg) {
9122   if (reg->encoding() < 8) {
9123     if (adr.base_needs_rex()) {
9124       if (adr.index_needs_rex()) {
9125         prefix(REX_XB);
9126       } else {
9127         prefix(REX_B);
9128       }
9129     } else {
9130       if (adr.index_needs_rex()) {
9131         prefix(REX_X);
9132       }
9133     }
9134   } else {
9135     if (adr.base_needs_rex()) {
9136       if (adr.index_needs_rex()) {
9137         prefix(REX_RXB);
9138       } else {
9139         prefix(REX_RB);
9140       }
9141     } else {
9142       if (adr.index_needs_rex()) {
9143         prefix(REX_RX);
9144       } else {
9145         prefix(REX_R);
9146       }
9147     }
9148   }
9149 }
9150 
9151 void Assembler::prefixq(Address adr, XMMRegister src) {
9152   if (src->encoding() < 8) {
9153     if (adr.base_needs_rex()) {
9154       if (adr.index_needs_rex()) {
9155         prefix(REX_WXB);
9156       } else {
9157         prefix(REX_WB);
9158       }
9159     } else {
9160       if (adr.index_needs_rex()) {
9161         prefix(REX_WX);
9162       } else {
9163         prefix(REX_W);
9164       }
9165     }
9166   } else {
9167     if (adr.base_needs_rex()) {
9168       if (adr.index_needs_rex()) {
9169         prefix(REX_WRXB);
9170       } else {
9171         prefix(REX_WRB);
9172       }
9173     } else {
9174       if (adr.index_needs_rex()) {
9175         prefix(REX_WRX);
9176       } else {
9177         prefix(REX_WR);
9178       }
9179     }
9180   }
9181 }
9182 
9183 void Assembler::adcq(Register dst, int32_t imm32) {
9184   (void) prefixq_and_encode(dst->encoding());
9185   emit_arith(0x81, 0xD0, dst, imm32);
9186 }
9187 
9188 void Assembler::adcq(Register dst, Address src) {
9189   InstructionMark im(this);
9190   prefixq(src, dst);
9191   emit_int8(0x13);
9192   emit_operand(dst, src);
9193 }
9194 
9195 void Assembler::adcq(Register dst, Register src) {
9196   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9197   emit_arith(0x13, 0xC0, dst, src);
9198 }
9199 
9200 void Assembler::addq(Address dst, int32_t imm32) {
9201   InstructionMark im(this);
9202   prefixq(dst);
9203   emit_arith_operand(0x81, rax, dst,imm32);
9204 }
9205 
9206 void Assembler::addq(Address dst, Register src) {
9207   InstructionMark im(this);
9208   prefixq(dst, src);
9209   emit_int8(0x01);
9210   emit_operand(src, dst);
9211 }
9212 
9213 void Assembler::addq(Register dst, int32_t imm32) {
9214   (void) prefixq_and_encode(dst->encoding());
9215   emit_arith(0x81, 0xC0, dst, imm32);
9216 }
9217 
9218 void Assembler::addq(Register dst, Address src) {
9219   InstructionMark im(this);
9220   prefixq(src, dst);
9221   emit_int8(0x03);
9222   emit_operand(dst, src);
9223 }
9224 
9225 void Assembler::addq(Register dst, Register src) {
9226   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9227   emit_arith(0x03, 0xC0, dst, src);
9228 }
9229 
9230 void Assembler::adcxq(Register dst, Register src) {
9231   //assert(VM_Version::supports_adx(), "adx instructions not supported");
9232   emit_int8((unsigned char)0x66);
9233   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9234   emit_int8(0x0F);
9235   emit_int8(0x38);
9236   emit_int8((unsigned char)0xF6);
9237   emit_int8((unsigned char)(0xC0 | encode));
9238 }
9239 
9240 void Assembler::adoxq(Register dst, Register src) {
9241   //assert(VM_Version::supports_adx(), "adx instructions not supported");
9242   emit_int8((unsigned char)0xF3);
9243   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9244   emit_int8(0x0F);
9245   emit_int8(0x38);
9246   emit_int8((unsigned char)0xF6);
9247   emit_int8((unsigned char)(0xC0 | encode));
9248 }
9249 
9250 void Assembler::andq(Address dst, int32_t imm32) {
9251   InstructionMark im(this);
9252   prefixq(dst);
9253   emit_int8((unsigned char)0x81);
9254   emit_operand(rsp, dst, 4);
9255   emit_int32(imm32);
9256 }
9257 
9258 void Assembler::andq(Register dst, int32_t imm32) {
9259   (void) prefixq_and_encode(dst->encoding());
9260   emit_arith(0x81, 0xE0, dst, imm32);
9261 }
9262 
9263 void Assembler::andq(Register dst, Address src) {
9264   InstructionMark im(this);
9265   prefixq(src, dst);
9266   emit_int8(0x23);
9267   emit_operand(dst, src);
9268 }
9269 
9270 void Assembler::andq(Register dst, Register src) {
9271   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9272   emit_arith(0x23, 0xC0, dst, src);
9273 }
9274 
9275 void Assembler::andnq(Register dst, Register src1, Register src2) {
9276   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9277   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9278   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9279   emit_int8((unsigned char)0xF2);
9280   emit_int8((unsigned char)(0xC0 | encode));
9281 }
9282 
9283 void Assembler::andnq(Register dst, Register src1, Address src2) {
9284   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9285   InstructionMark im(this);
9286   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9287   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9288   emit_int8((unsigned char)0xF2);
9289   emit_operand(dst, src2);
9290 }
9291 
9292 void Assembler::bsfq(Register dst, Register src) {
9293   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9294   emit_int8(0x0F);
9295   emit_int8((unsigned char)0xBC);
9296   emit_int8((unsigned char)(0xC0 | encode));
9297 }
9298 
9299 void Assembler::bsrq(Register dst, Register src) {
9300   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9301   emit_int8(0x0F);
9302   emit_int8((unsigned char)0xBD);
9303   emit_int8((unsigned char)(0xC0 | encode));
9304 }
9305 
9306 void Assembler::bswapq(Register reg) {
9307   int encode = prefixq_and_encode(reg->encoding());
9308   emit_int8(0x0F);
9309   emit_int8((unsigned char)(0xC8 | encode));
9310 }
9311 
9312 void Assembler::blsiq(Register dst, Register src) {
9313   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9314   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9315   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9316   emit_int8((unsigned char)0xF3);
9317   emit_int8((unsigned char)(0xC0 | encode));
9318 }
9319 
9320 void Assembler::blsiq(Register dst, Address src) {
9321   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9322   InstructionMark im(this);
9323   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9324   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9325   emit_int8((unsigned char)0xF3);
9326   emit_operand(rbx, src);
9327 }
9328 
9329 void Assembler::blsmskq(Register dst, Register src) {
9330   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9331   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9332   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9333   emit_int8((unsigned char)0xF3);
9334   emit_int8((unsigned char)(0xC0 | encode));
9335 }
9336 
9337 void Assembler::blsmskq(Register dst, Address src) {
9338   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9339   InstructionMark im(this);
9340   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9341   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9342   emit_int8((unsigned char)0xF3);
9343   emit_operand(rdx, src);
9344 }
9345 
9346 void Assembler::blsrq(Register dst, Register src) {
9347   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9348   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9349   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9350   emit_int8((unsigned char)0xF3);
9351   emit_int8((unsigned char)(0xC0 | encode));
9352 }
9353 
9354 void Assembler::blsrq(Register dst, Address src) {
9355   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9356   InstructionMark im(this);
9357   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9358   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9359   emit_int8((unsigned char)0xF3);
9360   emit_operand(rcx, src);
9361 }
9362 
9363 void Assembler::cdqq() {
9364   prefix(REX_W);
9365   emit_int8((unsigned char)0x99);
9366 }
9367 
9368 void Assembler::clflush(Address adr) {
9369   prefix(adr);
9370   emit_int8(0x0F);
9371   emit_int8((unsigned char)0xAE);
9372   emit_operand(rdi, adr);
9373 }
9374 
9375 void Assembler::cmovq(Condition cc, Register dst, Register src) {
9376   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9377   emit_int8(0x0F);
9378   emit_int8(0x40 | cc);
9379   emit_int8((unsigned char)(0xC0 | encode));
9380 }
9381 
9382 void Assembler::cmovq(Condition cc, Register dst, Address src) {
9383   InstructionMark im(this);
9384   prefixq(src, dst);
9385   emit_int8(0x0F);
9386   emit_int8(0x40 | cc);
9387   emit_operand(dst, src);
9388 }
9389 
9390 void Assembler::cmpq(Address dst, int32_t imm32) {
9391   InstructionMark im(this);
9392   prefixq(dst);
9393   emit_int8((unsigned char)0x81);
9394   emit_operand(rdi, dst, 4);
9395   emit_int32(imm32);
9396 }
9397 
9398 void Assembler::cmpq(Register dst, int32_t imm32) {
9399   (void) prefixq_and_encode(dst->encoding());
9400   emit_arith(0x81, 0xF8, dst, imm32);
9401 }
9402 
9403 void Assembler::cmpq(Address dst, Register src) {
9404   InstructionMark im(this);
9405   prefixq(dst, src);
9406   emit_int8(0x3B);
9407   emit_operand(src, dst);
9408 }
9409 
9410 void Assembler::cmpq(Register dst, Register src) {
9411   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9412   emit_arith(0x3B, 0xC0, dst, src);
9413 }
9414 
9415 void Assembler::cmpq(Register dst, Address  src) {
9416   InstructionMark im(this);
9417   prefixq(src, dst);
9418   emit_int8(0x3B);
9419   emit_operand(dst, src);
9420 }
9421 
9422 void Assembler::cmpxchgq(Register reg, Address adr) {
9423   InstructionMark im(this);
9424   prefixq(adr, reg);
9425   emit_int8(0x0F);
9426   emit_int8((unsigned char)0xB1);
9427   emit_operand(reg, adr);
9428 }
9429 
9430 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
9431   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
9432   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9433   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
9434   emit_int8(0x2A);
9435   emit_int8((unsigned char)(0xC0 | encode));
9436 }
9437 
9438 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
9439   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
9440   InstructionMark im(this);
9441   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9442   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
9443   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
9444   emit_int8(0x2A);
9445   emit_operand(dst, src);
9446 }
9447 
9448 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
9449   NOT_LP64(assert(VM_Version::supports_sse(), ""));
9450   InstructionMark im(this);
9451   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9452   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
9453   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
9454   emit_int8(0x2A);
9455   emit_operand(dst, src);
9456 }
9457 
9458 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
9459   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
9460   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9461   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
9462   emit_int8(0x2C);
9463   emit_int8((unsigned char)(0xC0 | encode));
9464 }
9465 
9466 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
9467   NOT_LP64(assert(VM_Version::supports_sse(), ""));
9468   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9469   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
9470   emit_int8(0x2C);
9471   emit_int8((unsigned char)(0xC0 | encode));
9472 }
9473 
9474 void Assembler::decl(Register dst) {
9475   // Don't use it directly. Use MacroAssembler::decrementl() instead.
9476   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
9477   int encode = prefix_and_encode(dst->encoding());
9478   emit_int8((unsigned char)0xFF);
9479   emit_int8((unsigned char)(0xC8 | encode));
9480 }
9481 
9482 void Assembler::decq(Register dst) {
9483   // Don't use it directly. Use MacroAssembler::decrementq() instead.
9484   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
9485   int encode = prefixq_and_encode(dst->encoding());
9486   emit_int8((unsigned char)0xFF);
9487   emit_int8(0xC8 | encode);
9488 }
9489 
9490 void Assembler::decq(Address dst) {
9491   // Don't use it directly. Use MacroAssembler::decrementq() instead.
9492   InstructionMark im(this);
9493   prefixq(dst);
9494   emit_int8((unsigned char)0xFF);
9495   emit_operand(rcx, dst);
9496 }
9497 
9498 void Assembler::fxrstor(Address src) {
9499   prefixq(src);
9500   emit_int8(0x0F);
9501   emit_int8((unsigned char)0xAE);
9502   emit_operand(as_Register(1), src);
9503 }
9504 
9505 void Assembler::xrstor(Address src) {
9506   prefixq(src);
9507   emit_int8(0x0F);
9508   emit_int8((unsigned char)0xAE);
9509   emit_operand(as_Register(5), src);
9510 }
9511 
9512 void Assembler::fxsave(Address dst) {
9513   prefixq(dst);
9514   emit_int8(0x0F);
9515   emit_int8((unsigned char)0xAE);
9516   emit_operand(as_Register(0), dst);
9517 }
9518 
9519 void Assembler::xsave(Address dst) {
9520   prefixq(dst);
9521   emit_int8(0x0F);
9522   emit_int8((unsigned char)0xAE);
9523   emit_operand(as_Register(4), dst);
9524 }
9525 
9526 void Assembler::idivq(Register src) {
9527   int encode = prefixq_and_encode(src->encoding());
9528   emit_int8((unsigned char)0xF7);
9529   emit_int8((unsigned char)(0xF8 | encode));
9530 }
9531 
9532 void Assembler::imulq(Register dst, Register src) {
9533   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9534   emit_int8(0x0F);
9535   emit_int8((unsigned char)0xAF);
9536   emit_int8((unsigned char)(0xC0 | encode));
9537 }
9538 
9539 void Assembler::imulq(Register dst, Register src, int value) {
9540   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9541   if (is8bit(value)) {
9542     emit_int8(0x6B);
9543     emit_int8((unsigned char)(0xC0 | encode));
9544     emit_int8(value & 0xFF);
9545   } else {
9546     emit_int8(0x69);
9547     emit_int8((unsigned char)(0xC0 | encode));
9548     emit_int32(value);
9549   }
9550 }
9551 
9552 void Assembler::imulq(Register dst, Address src) {
9553   InstructionMark im(this);
9554   prefixq(src, dst);
9555   emit_int8(0x0F);
9556   emit_int8((unsigned char) 0xAF);
9557   emit_operand(dst, src);
9558 }
9559 
9560 void Assembler::incl(Register dst) {
9561   // Don't use it directly. Use MacroAssembler::incrementl() instead.
9562   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
9563   int encode = prefix_and_encode(dst->encoding());
9564   emit_int8((unsigned char)0xFF);
9565   emit_int8((unsigned char)(0xC0 | encode));
9566 }
9567 
9568 void Assembler::incq(Register dst) {
9569   // Don't use it directly. Use MacroAssembler::incrementq() instead.
9570   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
9571   int encode = prefixq_and_encode(dst->encoding());
9572   emit_int8((unsigned char)0xFF);
9573   emit_int8((unsigned char)(0xC0 | encode));
9574 }
9575 
9576 void Assembler::incq(Address dst) {
9577   // Don't use it directly. Use MacroAssembler::incrementq() instead.
9578   InstructionMark im(this);
9579   prefixq(dst);
9580   emit_int8((unsigned char)0xFF);
9581   emit_operand(rax, dst);
9582 }
9583 
9584 void Assembler::lea(Register dst, Address src) {
9585   leaq(dst, src);
9586 }
9587 
9588 void Assembler::leaq(Register dst, Address src) {
9589   InstructionMark im(this);
9590   prefixq(src, dst);
9591   emit_int8((unsigned char)0x8D);
9592   emit_operand(dst, src);
9593 }
9594 
9595 void Assembler::mov64(Register dst, int64_t imm64) {
9596   InstructionMark im(this);
9597   int encode = prefixq_and_encode(dst->encoding());
9598   emit_int8((unsigned char)(0xB8 | encode));
9599   emit_int64(imm64);
9600 }
9601 
9602 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
9603   InstructionMark im(this);
9604   int encode = prefixq_and_encode(dst->encoding());
9605   emit_int8(0xB8 | encode);
9606   emit_data64(imm64, rspec);
9607 }
9608 
9609 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
9610   InstructionMark im(this);
9611   int encode = prefix_and_encode(dst->encoding());
9612   emit_int8((unsigned char)(0xB8 | encode));
9613   emit_data((int)imm32, rspec, narrow_oop_operand);
9614 }
9615 
9616 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
9617   InstructionMark im(this);
9618   prefix(dst);
9619   emit_int8((unsigned char)0xC7);
9620   emit_operand(rax, dst, 4);
9621   emit_data((int)imm32, rspec, narrow_oop_operand);
9622 }
9623 
9624 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
9625   InstructionMark im(this);
9626   int encode = prefix_and_encode(src1->encoding());
9627   emit_int8((unsigned char)0x81);
9628   emit_int8((unsigned char)(0xF8 | encode));
9629   emit_data((int)imm32, rspec, narrow_oop_operand);
9630 }
9631 
9632 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
9633   InstructionMark im(this);
9634   prefix(src1);
9635   emit_int8((unsigned char)0x81);
9636   emit_operand(rax, src1, 4);
9637   emit_data((int)imm32, rspec, narrow_oop_operand);
9638 }
9639 
9640 void Assembler::lzcntq(Register dst, Register src) {
9641   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
9642   emit_int8((unsigned char)0xF3);
9643   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9644   emit_int8(0x0F);
9645   emit_int8((unsigned char)0xBD);
9646   emit_int8((unsigned char)(0xC0 | encode));
9647 }
9648 
9649 void Assembler::movdq(XMMRegister dst, Register src) {
9650   // table D-1 says MMX/SSE2
9651   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
9652   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9653   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
9654   emit_int8(0x6E);
9655   emit_int8((unsigned char)(0xC0 | encode));
9656 }
9657 
9658 void Assembler::movdq(Register dst, XMMRegister src) {
9659   // table D-1 says MMX/SSE2
9660   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
9661   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9662   // swap src/dst to get correct prefix
9663   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
9664   emit_int8(0x7E);
9665   emit_int8((unsigned char)(0xC0 | encode));
9666 }
9667 
9668 void Assembler::movq(Register dst, Register src) {
9669   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9670   emit_int8((unsigned char)0x8B);
9671   emit_int8((unsigned char)(0xC0 | encode));
9672 }
9673 
9674 void Assembler::movq(Register dst, Address src) {
9675   InstructionMark im(this);
9676   prefixq(src, dst);
9677   emit_int8((unsigned char)0x8B);
9678   emit_operand(dst, src);
9679 }
9680 
9681 void Assembler::movq(Address dst, Register src) {
9682   InstructionMark im(this);
9683   prefixq(dst, src);
9684   emit_int8((unsigned char)0x89);
9685   emit_operand(src, dst);
9686 }
9687 
9688 void Assembler::movsbq(Register dst, Address src) {
9689   InstructionMark im(this);
9690   prefixq(src, dst);
9691   emit_int8(0x0F);
9692   emit_int8((unsigned char)0xBE);
9693   emit_operand(dst, src);
9694 }
9695 
9696 void Assembler::movsbq(Register dst, Register src) {
9697   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9698   emit_int8(0x0F);
9699   emit_int8((unsigned char)0xBE);
9700   emit_int8((unsigned char)(0xC0 | encode));
9701 }
9702 
9703 void Assembler::movslq(Register dst, int32_t imm32) {
9704   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
9705   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
9706   // as a result we shouldn't use until tested at runtime...
9707   ShouldNotReachHere();
9708   InstructionMark im(this);
9709   int encode = prefixq_and_encode(dst->encoding());
9710   emit_int8((unsigned char)(0xC7 | encode));
9711   emit_int32(imm32);
9712 }
9713 
9714 void Assembler::movslq(Address dst, int32_t imm32) {
9715   assert(is_simm32(imm32), "lost bits");
9716   InstructionMark im(this);
9717   prefixq(dst);
9718   emit_int8((unsigned char)0xC7);
9719   emit_operand(rax, dst, 4);
9720   emit_int32(imm32);
9721 }
9722 
9723 void Assembler::movslq(Register dst, Address src) {
9724   InstructionMark im(this);
9725   prefixq(src, dst);
9726   emit_int8(0x63);
9727   emit_operand(dst, src);
9728 }
9729 
9730 void Assembler::movslq(Register dst, Register src) {
9731   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9732   emit_int8(0x63);
9733   emit_int8((unsigned char)(0xC0 | encode));
9734 }
9735 
9736 void Assembler::movswq(Register dst, Address src) {
9737   InstructionMark im(this);
9738   prefixq(src, dst);
9739   emit_int8(0x0F);
9740   emit_int8((unsigned char)0xBF);
9741   emit_operand(dst, src);
9742 }
9743 
9744 void Assembler::movswq(Register dst, Register src) {
9745   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9746   emit_int8((unsigned char)0x0F);
9747   emit_int8((unsigned char)0xBF);
9748   emit_int8((unsigned char)(0xC0 | encode));
9749 }
9750 
9751 void Assembler::movzbq(Register dst, Address src) {
9752   InstructionMark im(this);
9753   prefixq(src, dst);
9754   emit_int8((unsigned char)0x0F);
9755   emit_int8((unsigned char)0xB6);
9756   emit_operand(dst, src);
9757 }
9758 
9759 void Assembler::movzbq(Register dst, Register src) {
9760   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9761   emit_int8(0x0F);
9762   emit_int8((unsigned char)0xB6);
9763   emit_int8(0xC0 | encode);
9764 }
9765 
9766 void Assembler::movzwq(Register dst, Address src) {
9767   InstructionMark im(this);
9768   prefixq(src, dst);
9769   emit_int8((unsigned char)0x0F);
9770   emit_int8((unsigned char)0xB7);
9771   emit_operand(dst, src);
9772 }
9773 
9774 void Assembler::movzwq(Register dst, Register src) {
9775   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9776   emit_int8((unsigned char)0x0F);
9777   emit_int8((unsigned char)0xB7);
9778   emit_int8((unsigned char)(0xC0 | encode));
9779 }
9780 
9781 void Assembler::mulq(Address src) {
9782   InstructionMark im(this);
9783   prefixq(src);
9784   emit_int8((unsigned char)0xF7);
9785   emit_operand(rsp, src);
9786 }
9787 
9788 void Assembler::mulq(Register src) {
9789   int encode = prefixq_and_encode(src->encoding());
9790   emit_int8((unsigned char)0xF7);
9791   emit_int8((unsigned char)(0xE0 | encode));
9792 }
9793 
9794 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
9795   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
9796   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9797   int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
9798   emit_int8((unsigned char)0xF6);
9799   emit_int8((unsigned char)(0xC0 | encode));
9800 }
9801 
9802 void Assembler::negq(Register dst) {
9803   int encode = prefixq_and_encode(dst->encoding());
9804   emit_int8((unsigned char)0xF7);
9805   emit_int8((unsigned char)(0xD8 | encode));
9806 }
9807 
9808 void Assembler::notq(Register dst) {
9809   int encode = prefixq_and_encode(dst->encoding());
9810   emit_int8((unsigned char)0xF7);
9811   emit_int8((unsigned char)(0xD0 | encode));
9812 }
9813 
9814 void Assembler::orq(Address dst, int32_t imm32) {
9815   InstructionMark im(this);
9816   prefixq(dst);
9817   emit_int8((unsigned char)0x81);
9818   emit_operand(rcx, dst, 4);
9819   emit_int32(imm32);
9820 }
9821 
9822 void Assembler::orq(Register dst, int32_t imm32) {
9823   (void) prefixq_and_encode(dst->encoding());
9824   emit_arith(0x81, 0xC8, dst, imm32);
9825 }
9826 
9827 void Assembler::orq(Register dst, Address src) {
9828   InstructionMark im(this);
9829   prefixq(src, dst);
9830   emit_int8(0x0B);
9831   emit_operand(dst, src);
9832 }
9833 
9834 void Assembler::orq(Register dst, Register src) {
9835   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9836   emit_arith(0x0B, 0xC0, dst, src);
9837 }
9838 
9839 void Assembler::popa() { // 64bit
9840   movq(r15, Address(rsp, 0));
9841   movq(r14, Address(rsp, wordSize));
9842   movq(r13, Address(rsp, 2 * wordSize));
9843   movq(r12, Address(rsp, 3 * wordSize));
9844   movq(r11, Address(rsp, 4 * wordSize));
9845   movq(r10, Address(rsp, 5 * wordSize));
9846   movq(r9,  Address(rsp, 6 * wordSize));
9847   movq(r8,  Address(rsp, 7 * wordSize));
9848   movq(rdi, Address(rsp, 8 * wordSize));
9849   movq(rsi, Address(rsp, 9 * wordSize));
9850   movq(rbp, Address(rsp, 10 * wordSize));
9851   // skip rsp
9852   movq(rbx, Address(rsp, 12 * wordSize));
9853   movq(rdx, Address(rsp, 13 * wordSize));
9854   movq(rcx, Address(rsp, 14 * wordSize));
9855   movq(rax, Address(rsp, 15 * wordSize));
9856 
9857   addq(rsp, 16 * wordSize);
9858 }
9859 
9860 void Assembler::popcntq(Register dst, Address src) {
9861   assert(VM_Version::supports_popcnt(), "must support");
9862   InstructionMark im(this);
9863   emit_int8((unsigned char)0xF3);
9864   prefixq(src, dst);
9865   emit_int8((unsigned char)0x0F);
9866   emit_int8((unsigned char)0xB8);
9867   emit_operand(dst, src);
9868 }
9869 
9870 void Assembler::popcntq(Register dst, Register src) {
9871   assert(VM_Version::supports_popcnt(), "must support");
9872   emit_int8((unsigned char)0xF3);
9873   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9874   emit_int8((unsigned char)0x0F);
9875   emit_int8((unsigned char)0xB8);
9876   emit_int8((unsigned char)(0xC0 | encode));
9877 }
9878 
9879 void Assembler::popq(Address dst) {
9880   InstructionMark im(this);
9881   prefixq(dst);
9882   emit_int8((unsigned char)0x8F);
9883   emit_operand(rax, dst);
9884 }
9885 
9886 void Assembler::pusha() { // 64bit
9887   // we have to store original rsp.  ABI says that 128 bytes
9888   // below rsp are local scratch.
9889   movq(Address(rsp, -5 * wordSize), rsp);
9890 
9891   subq(rsp, 16 * wordSize);
9892 
9893   movq(Address(rsp, 15 * wordSize), rax);
9894   movq(Address(rsp, 14 * wordSize), rcx);
9895   movq(Address(rsp, 13 * wordSize), rdx);
9896   movq(Address(rsp, 12 * wordSize), rbx);
9897   // skip rsp
9898   movq(Address(rsp, 10 * wordSize), rbp);
9899   movq(Address(rsp, 9 * wordSize), rsi);
9900   movq(Address(rsp, 8 * wordSize), rdi);
9901   movq(Address(rsp, 7 * wordSize), r8);
9902   movq(Address(rsp, 6 * wordSize), r9);
9903   movq(Address(rsp, 5 * wordSize), r10);
9904   movq(Address(rsp, 4 * wordSize), r11);
9905   movq(Address(rsp, 3 * wordSize), r12);
9906   movq(Address(rsp, 2 * wordSize), r13);
9907   movq(Address(rsp, wordSize), r14);
9908   movq(Address(rsp, 0), r15);
9909 }
9910 
9911 void Assembler::pushq(Address src) {
9912   InstructionMark im(this);
9913   prefixq(src);
9914   emit_int8((unsigned char)0xFF);
9915   emit_operand(rsi, src);
9916 }
9917 
9918 void Assembler::rclq(Register dst, int imm8) {
9919   assert(isShiftCount(imm8 >> 1), "illegal shift count");
9920   int encode = prefixq_and_encode(dst->encoding());
9921   if (imm8 == 1) {
9922     emit_int8((unsigned char)0xD1);
9923     emit_int8((unsigned char)(0xD0 | encode));
9924   } else {
9925     emit_int8((unsigned char)0xC1);
9926     emit_int8((unsigned char)(0xD0 | encode));
9927     emit_int8(imm8);
9928   }
9929 }
9930 
9931 void Assembler::rcrq(Register dst, int imm8) {
9932   assert(isShiftCount(imm8 >> 1), "illegal shift count");
9933   int encode = prefixq_and_encode(dst->encoding());
9934   if (imm8 == 1) {
9935     emit_int8((unsigned char)0xD1);
9936     emit_int8((unsigned char)(0xD8 | encode));
9937   } else {
9938     emit_int8((unsigned char)0xC1);
9939     emit_int8((unsigned char)(0xD8 | encode));
9940     emit_int8(imm8);
9941   }
9942 }
9943 
9944 void Assembler::rorq(Register dst, int imm8) {
9945   assert(isShiftCount(imm8 >> 1), "illegal shift count");
9946   int encode = prefixq_and_encode(dst->encoding());
9947   if (imm8 == 1) {
9948     emit_int8((unsigned char)0xD1);
9949     emit_int8((unsigned char)(0xC8 | encode));
9950   } else {
9951     emit_int8((unsigned char)0xC1);
9952     emit_int8((unsigned char)(0xc8 | encode));
9953     emit_int8(imm8);
9954   }
9955 }
9956 
9957 void Assembler::rorxq(Register dst, Register src, int imm8) {
9958   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
9959   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9960   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
9961   emit_int8((unsigned char)0xF0);
9962   emit_int8((unsigned char)(0xC0 | encode));
9963   emit_int8(imm8);
9964 }
9965 
9966 void Assembler::rorxd(Register dst, Register src, int imm8) {
9967   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
9968   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9969   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
9970   emit_int8((unsigned char)0xF0);
9971   emit_int8((unsigned char)(0xC0 | encode));
9972   emit_int8(imm8);
9973 }
9974 
9975 void Assembler::sarq(Register dst, int imm8) {
9976   assert(isShiftCount(imm8 >> 1), "illegal shift count");
9977   int encode = prefixq_and_encode(dst->encoding());
9978   if (imm8 == 1) {
9979     emit_int8((unsigned char)0xD1);
9980     emit_int8((unsigned char)(0xF8 | encode));
9981   } else {
9982     emit_int8((unsigned char)0xC1);
9983     emit_int8((unsigned char)(0xF8 | encode));
9984     emit_int8(imm8);
9985   }
9986 }
9987 
9988 void Assembler::sarq(Register dst) {
9989   int encode = prefixq_and_encode(dst->encoding());
9990   emit_int8((unsigned char)0xD3);
9991   emit_int8((unsigned char)(0xF8 | encode));
9992 }
9993 
9994 void Assembler::sbbq(Address dst, int32_t imm32) {
9995   InstructionMark im(this);
9996   prefixq(dst);
9997   emit_arith_operand(0x81, rbx, dst, imm32);
9998 }
9999 
10000 void Assembler::sbbq(Register dst, int32_t imm32) {
10001   (void) prefixq_and_encode(dst->encoding());
10002   emit_arith(0x81, 0xD8, dst, imm32);
10003 }
10004 
10005 void Assembler::sbbq(Register dst, Address src) {
10006   InstructionMark im(this);
10007   prefixq(src, dst);
10008   emit_int8(0x1B);
10009   emit_operand(dst, src);
10010 }
10011 
10012 void Assembler::sbbq(Register dst, Register src) {
10013   (void) prefixq_and_encode(dst->encoding(), src->encoding());
10014   emit_arith(0x1B, 0xC0, dst, src);
10015 }
10016 
10017 void Assembler::shlq(Register dst, int imm8) {
10018   assert(isShiftCount(imm8 >> 1), "illegal shift count");
10019   int encode = prefixq_and_encode(dst->encoding());
10020   if (imm8 == 1) {
10021     emit_int8((unsigned char)0xD1);
10022     emit_int8((unsigned char)(0xE0 | encode));
10023   } else {
10024     emit_int8((unsigned char)0xC1);
10025     emit_int8((unsigned char)(0xE0 | encode));
10026     emit_int8(imm8);
10027   }
10028 }
10029 
10030 void Assembler::shlq(Register dst) {
10031   int encode = prefixq_and_encode(dst->encoding());
10032   emit_int8((unsigned char)0xD3);
10033   emit_int8((unsigned char)(0xE0 | encode));
10034 }
10035 
10036 void Assembler::shrq(Register dst, int imm8) {
10037   assert(isShiftCount(imm8 >> 1), "illegal shift count");
10038   int encode = prefixq_and_encode(dst->encoding());
10039   emit_int8((unsigned char)0xC1);
10040   emit_int8((unsigned char)(0xE8 | encode));
10041   emit_int8(imm8);
10042 }
10043 
10044 void Assembler::shrq(Register dst) {
10045   int encode = prefixq_and_encode(dst->encoding());
10046   emit_int8((unsigned char)0xD3);
10047   emit_int8(0xE8 | encode);
10048 }
10049 
10050 void Assembler::subq(Address dst, int32_t imm32) {
10051   InstructionMark im(this);
10052   prefixq(dst);
10053   emit_arith_operand(0x81, rbp, dst, imm32);
10054 }
10055 
10056 void Assembler::subq(Address dst, Register src) {
10057   InstructionMark im(this);
10058   prefixq(dst, src);
10059   emit_int8(0x29);
10060   emit_operand(src, dst);
10061 }
10062 
10063 void Assembler::subq(Register dst, int32_t imm32) {
10064   (void) prefixq_and_encode(dst->encoding());
10065   emit_arith(0x81, 0xE8, dst, imm32);
10066 }
10067 
10068 // Force generation of a 4 byte immediate value even if it fits into 8bit
10069 void Assembler::subq_imm32(Register dst, int32_t imm32) {
10070   (void) prefixq_and_encode(dst->encoding());
10071   emit_arith_imm32(0x81, 0xE8, dst, imm32);
10072 }
10073 
10074 void Assembler::subq(Register dst, Address src) {
10075   InstructionMark im(this);
10076   prefixq(src, dst);
10077   emit_int8(0x2B);
10078   emit_operand(dst, src);
10079 }
10080 
10081 void Assembler::subq(Register dst, Register src) {
10082   (void) prefixq_and_encode(dst->encoding(), src->encoding());
10083   emit_arith(0x2B, 0xC0, dst, src);
10084 }
10085 
10086 void Assembler::testq(Register dst, int32_t imm32) {
10087   // not using emit_arith because test
10088   // doesn't support sign-extension of
10089   // 8bit operands
10090   int encode = dst->encoding();
10091   if (encode == 0) {
10092     prefix(REX_W);
10093     emit_int8((unsigned char)0xA9);
10094   } else {
10095     encode = prefixq_and_encode(encode);
10096     emit_int8((unsigned char)0xF7);
10097     emit_int8((unsigned char)(0xC0 | encode));
10098   }
10099   emit_int32(imm32);
10100 }
10101 
10102 void Assembler::testq(Register dst, Register src) {
10103   (void) prefixq_and_encode(dst->encoding(), src->encoding());
10104   emit_arith(0x85, 0xC0, dst, src);
10105 }
10106 
10107 void Assembler::xaddq(Address dst, Register src) {
10108   InstructionMark im(this);
10109   prefixq(dst, src);
10110   emit_int8(0x0F);
10111   emit_int8((unsigned char)0xC1);
10112   emit_operand(src, dst);
10113 }
10114 
10115 void Assembler::xchgq(Register dst, Address src) {
10116   InstructionMark im(this);
10117   prefixq(src, dst);
10118   emit_int8((unsigned char)0x87);
10119   emit_operand(dst, src);
10120 }
10121 
10122 void Assembler::xchgq(Register dst, Register src) {
10123   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
10124   emit_int8((unsigned char)0x87);
10125   emit_int8((unsigned char)(0xc0 | encode));
10126 }
10127 
10128 void Assembler::xorq(Register dst, Register src) {
10129   (void) prefixq_and_encode(dst->encoding(), src->encoding());
10130   emit_arith(0x33, 0xC0, dst, src);
10131 }
10132 
10133 void Assembler::xorq(Register dst, Address src) {
10134   InstructionMark im(this);
10135   prefixq(src, dst);
10136   emit_int8(0x33);
10137   emit_operand(dst, src);
10138 }
10139 
10140 #endif // !LP64