New src/cpu/x86/vm/assembler

   1 /*
   2  * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "gc/shared/cardTableModRefBS.hpp"
  29 #include "gc/shared/collectedHeap.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "memory/resourceArea.hpp"
  32 #include "prims/methodHandles.hpp"
  33 #include "runtime/biasedLocking.hpp"
  34 #include "runtime/interfaceSupport.hpp"
  35 #include "runtime/objectMonitor.hpp"
  36 #include "runtime/os.hpp"
  37 #include "runtime/sharedRuntime.hpp"
  38 #include "runtime/stubRoutines.hpp"
  39 #include "utilities/macros.hpp"
  40 #if INCLUDE_ALL_GCS
  41 #include "gc/g1/g1CollectedHeap.inline.hpp"
  42 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
  43 #include "gc/g1/heapRegion.hpp"
  44 #endif // INCLUDE_ALL_GCS
  45 
  46 #ifdef PRODUCT
  47 #define BLOCK_COMMENT(str) /* nothing */
  48 #define STOP(error) stop(error)
  49 #else
  50 #define BLOCK_COMMENT(str) block_comment(str)
  51 #define STOP(error) block_comment(error); stop(error)
  52 #endif
  53 
  54 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  55 // Implementation of AddressLiteral
  56 
  57 // A 2-D table for managing compressed displacement(disp8) on EVEX enabled platforms.
  58 unsigned char tuple_table[Assembler::EVEX_ETUP + 1][Assembler::AVX_512bit + 1] = {
  59   // -----------------Table 4.5 -------------------- //
  60   16, 32, 64,  // EVEX_FV(0)
  61   4,  4,  4,   // EVEX_FV(1) - with Evex.b
  62   16, 32, 64,  // EVEX_FV(2) - with Evex.w
  63   8,  8,  8,   // EVEX_FV(3) - with Evex.w and Evex.b
  64   8,  16, 32,  // EVEX_HV(0)
  65   4,  4,  4,   // EVEX_HV(1) - with Evex.b
  66   // -----------------Table 4.6 -------------------- //
  67   16, 32, 64,  // EVEX_FVM(0)
  68   1,  1,  1,   // EVEX_T1S(0)
  69   2,  2,  2,   // EVEX_T1S(1)
  70   4,  4,  4,   // EVEX_T1S(2)
  71   8,  8,  8,   // EVEX_T1S(3)
  72   4,  4,  4,   // EVEX_T1F(0)
  73   8,  8,  8,   // EVEX_T1F(1)
  74   8,  8,  8,   // EVEX_T2(0)
  75   0,  16, 16,  // EVEX_T2(1)
  76   0,  16, 16,  // EVEX_T4(0)
  77   0,  0,  32,  // EVEX_T4(1)
  78   0,  0,  32,  // EVEX_T8(0)
  79   8,  16, 32,  // EVEX_HVM(0)
  80   4,  8,  16,  // EVEX_QVM(0)
  81   2,  4,  8,   // EVEX_OVM(0)
  82   16, 16, 16,  // EVEX_M128(0)
  83   8,  32, 64,  // EVEX_DUP(0)
  84   0,  0,  0    // EVEX_NTUP
  85 };
  86 
  87 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  88   _is_lval = false;
  89   _target = target;
  90   switch (rtype) {
  91   case relocInfo::oop_type:
  92   case relocInfo::metadata_type:
  93     // Oops are a special case. Normally they would be their own section
  94     // but in cases like icBuffer they are literals in the code stream that
  95     // we don't have a section for. We use none so that we get a literal address
  96     // which is always patchable.
  97     break;
  98   case relocInfo::external_word_type:
  99     _rspec = external_word_Relocation::spec(target);
 100     break;
 101   case relocInfo::internal_word_type:
 102     _rspec = internal_word_Relocation::spec(target);
 103     break;
 104   case relocInfo::opt_virtual_call_type:
 105     _rspec = opt_virtual_call_Relocation::spec();
 106     break;
 107   case relocInfo::static_call_type:
 108     _rspec = static_call_Relocation::spec();
 109     break;
 110   case relocInfo::runtime_call_type:
 111     _rspec = runtime_call_Relocation::spec();
 112     break;
 113   case relocInfo::poll_type:
 114   case relocInfo::poll_return_type:
 115     _rspec = Relocation::spec_simple(rtype);
 116     break;
 117   case relocInfo::none:
 118     break;
 119   default:
 120     ShouldNotReachHere();
 121     break;
 122   }
 123 }
 124 
 125 // Implementation of Address
 126 
 127 #ifdef _LP64
 128 
 129 Address Address::make_array(ArrayAddress adr) {
 130   // Not implementable on 64bit machines
 131   // Should have been handled higher up the call chain.
 132   ShouldNotReachHere();
 133   return Address();
 134 }
 135 
 136 // exceedingly dangerous constructor
 137 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
 138   _base  = noreg;
 139   _index = noreg;
 140   _scale = no_scale;
 141   _disp  = disp;
 142   switch (rtype) {
 143     case relocInfo::external_word_type:
 144       _rspec = external_word_Relocation::spec(loc);
 145       break;
 146     case relocInfo::internal_word_type:
 147       _rspec = internal_word_Relocation::spec(loc);
 148       break;
 149     case relocInfo::runtime_call_type:
 150       // HMM
 151       _rspec = runtime_call_Relocation::spec();
 152       break;
 153     case relocInfo::poll_type:
 154     case relocInfo::poll_return_type:
 155       _rspec = Relocation::spec_simple(rtype);
 156       break;
 157     case relocInfo::none:
 158       break;
 159     default:
 160       ShouldNotReachHere();
 161   }
 162 }
 163 #else // LP64
 164 
 165 Address Address::make_array(ArrayAddress adr) {
 166   AddressLiteral base = adr.base();
 167   Address index = adr.index();
 168   assert(index._disp == 0, "must not have disp"); // maybe it can?
 169   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
 170   array._rspec = base._rspec;
 171   return array;
 172 }
 173 
 174 // exceedingly dangerous constructor
 175 Address::Address(address loc, RelocationHolder spec) {
 176   _base  = noreg;
 177   _index = noreg;
 178   _scale = no_scale;
 179   _disp  = (intptr_t) loc;
 180   _rspec = spec;
 181 }
 182 
 183 #endif // _LP64
 184 
 185 
 186 
 187 // Convert the raw encoding form into the form expected by the constructor for
 188 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 189 // that to noreg for the Address constructor.
 190 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
 191   RelocationHolder rspec;
 192   if (disp_reloc != relocInfo::none) {
 193     rspec = Relocation::spec_simple(disp_reloc);
 194   }
 195   bool valid_index = index != rsp->encoding();
 196   if (valid_index) {
 197     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 198     madr._rspec = rspec;
 199     return madr;
 200   } else {
 201     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 202     madr._rspec = rspec;
 203     return madr;
 204   }
 205 }
 206 
 207 // Implementation of Assembler
 208 
 209 int AbstractAssembler::code_fill_byte() {
 210   return (u_char)'\xF4'; // hlt
 211 }
 212 
 213 // make this go away someday
 214 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
 215   if (rtype == relocInfo::none)
 216     emit_int32(data);
 217   else
 218     emit_data(data, Relocation::spec_simple(rtype), format);
 219 }
 220 
 221 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
 222   assert(imm_operand == 0, "default format must be immediate in this file");
 223   assert(inst_mark() != NULL, "must be inside InstructionMark");
 224   if (rspec.type() !=  relocInfo::none) {
 225     #ifdef ASSERT
 226       check_relocation(rspec, format);
 227     #endif
 228     // Do not use AbstractAssembler::relocate, which is not intended for
 229     // embedded words.  Instead, relocate to the enclosing instruction.
 230 
 231     // hack. call32 is too wide for mask so use disp32
 232     if (format == call32_operand)
 233       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 234     else
 235       code_section()->relocate(inst_mark(), rspec, format);
 236   }
 237   emit_int32(data);
 238 }
 239 
 240 static int encode(Register r) {
 241   int enc = r->encoding();
 242   if (enc >= 8) {
 243     enc -= 8;
 244   }
 245   return enc;
 246 }
 247 
 248 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
 249   assert(dst->has_byte_register(), "must have byte register");
 250   assert(isByte(op1) && isByte(op2), "wrong opcode");
 251   assert(isByte(imm8), "not a byte");
 252   assert((op1 & 0x01) == 0, "should be 8bit operation");
 253   emit_int8(op1);
 254   emit_int8(op2 | encode(dst));
 255   emit_int8(imm8);
 256 }
 257 
 258 
 259 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
 260   assert(isByte(op1) && isByte(op2), "wrong opcode");
 261   assert((op1 & 0x01) == 1, "should be 32bit operation");
 262   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 263   if (is8bit(imm32)) {
 264     emit_int8(op1 | 0x02); // set sign bit
 265     emit_int8(op2 | encode(dst));
 266     emit_int8(imm32 & 0xFF);
 267   } else {
 268     emit_int8(op1);
 269     emit_int8(op2 | encode(dst));
 270     emit_int32(imm32);
 271   }
 272 }
 273 
 274 // Force generation of a 4 byte immediate value even if it fits into 8bit
 275 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
 276   assert(isByte(op1) && isByte(op2), "wrong opcode");
 277   assert((op1 & 0x01) == 1, "should be 32bit operation");
 278   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 279   emit_int8(op1);
 280   emit_int8(op2 | encode(dst));
 281   emit_int32(imm32);
 282 }
 283 
 284 // immediate-to-memory forms
 285 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
 286   assert((op1 & 0x01) == 1, "should be 32bit operation");
 287   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 288   if (is8bit(imm32)) {
 289     emit_int8(op1 | 0x02); // set sign bit
 290     emit_operand(rm, adr, 1);
 291     emit_int8(imm32 & 0xFF);
 292   } else {
 293     emit_int8(op1);
 294     emit_operand(rm, adr, 4);
 295     emit_int32(imm32);
 296   }
 297 }
 298 
 299 
 300 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 301   assert(isByte(op1) && isByte(op2), "wrong opcode");
 302   emit_int8(op1);
 303   emit_int8(op2 | encode(dst) << 3 | encode(src));
 304 }
 305 
 306 
 307 bool Assembler::query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
 308                                            int cur_tuple_type, int in_size_in_bits, int cur_encoding) {
 309   int mod_idx = 0;
 310   // We will test if the displacement fits the compressed format and if so
 311   // apply the compression to the displacment iff the result is8bit.
 312   if (VM_Version::supports_evex() && is_evex_inst) {
 313     switch (cur_tuple_type) {
 314     case EVEX_FV:
 315       if ((cur_encoding & VEX_W) == VEX_W) {
 316         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 317       } else {
 318         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 319       }
 320       break;
 321 
 322     case EVEX_HV:
 323       mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 324       break;
 325 
 326     case EVEX_FVM:
 327       break;
 328 
 329     case EVEX_T1S:
 330       switch (in_size_in_bits) {
 331       case EVEX_8bit:
 332         break;
 333 
 334       case EVEX_16bit:
 335         mod_idx = 1;
 336         break;
 337 
 338       case EVEX_32bit:
 339         mod_idx = 2;
 340         break;
 341 
 342       case EVEX_64bit:
 343         mod_idx = 3;
 344         break;
 345       }
 346       break;
 347 
 348     case EVEX_T1F:
 349     case EVEX_T2:
 350     case EVEX_T4:
 351       mod_idx = (in_size_in_bits == EVEX_64bit) ? 1 : 0;
 352       break;
 353 
 354     case EVEX_T8:
 355       break;
 356 
 357     case EVEX_HVM:
 358       break;
 359 
 360     case EVEX_QVM:
 361       break;
 362 
 363     case EVEX_OVM:
 364       break;
 365 
 366     case EVEX_M128:
 367       break;
 368 
 369     case EVEX_DUP:
 370       break;
 371 
 372     default:
 373       assert(0, "no valid evex tuple_table entry");
 374       break;
 375     }
 376 
 377     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 378       int disp_factor = tuple_table[cur_tuple_type + mod_idx][vector_len];
 379       if ((disp % disp_factor) == 0) {
 380         int new_disp = disp / disp_factor;
 381         if ((-0x80 <= new_disp && new_disp < 0x80)) {
 382           disp = new_disp;
 383         }
 384       } else {
 385         return false;
 386       }
 387     }
 388   }
 389   return (-0x80 <= disp && disp < 0x80);
 390 }
 391 
 392 
 393 bool Assembler::emit_compressed_disp_byte(int &disp) {
 394   int mod_idx = 0;
 395   // We will test if the displacement fits the compressed format and if so
 396   // apply the compression to the displacment iff the result is8bit.
 397   if (VM_Version::supports_evex() && _attributes && _attributes->is_evex_instruction()) {
 398     int evex_encoding = _attributes->get_evex_encoding();
 399     int tuple_type = _attributes->get_tuple_type();
 400     switch (tuple_type) {
 401     case EVEX_FV:
 402       if ((evex_encoding & VEX_W) == VEX_W) {
 403         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 404       } else {
 405         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 406       }
 407       break;
 408 
 409     case EVEX_HV:
 410       mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 411       break;
 412 
 413     case EVEX_FVM:
 414       break;
 415 
 416     case EVEX_T1S:
 417       switch (_attributes->get_input_size()) {
 418       case EVEX_8bit:
 419         break;
 420 
 421       case EVEX_16bit:
 422         mod_idx = 1;
 423         break;
 424 
 425       case EVEX_32bit:
 426         mod_idx = 2;
 427         break;
 428 
 429       case EVEX_64bit:
 430         mod_idx = 3;
 431         break;
 432       }
 433       break;
 434 
 435     case EVEX_T1F:
 436     case EVEX_T2:
 437     case EVEX_T4:
 438       mod_idx = (_attributes->get_input_size() == EVEX_64bit) ? 1 : 0;
 439       break;
 440 
 441     case EVEX_T8:
 442       break;
 443 
 444     case EVEX_HVM:
 445       break;
 446 
 447     case EVEX_QVM:
 448       break;
 449 
 450     case EVEX_OVM:
 451       break;
 452 
 453     case EVEX_M128:
 454       break;
 455 
 456     case EVEX_DUP:
 457       break;
 458 
 459     default:
 460       assert(0, "no valid evex tuple_table entry");
 461       break;
 462     }
 463 
 464     int vector_len = _attributes->get_vector_len();
 465     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 466       int disp_factor = tuple_table[tuple_type + mod_idx][vector_len];
 467       if ((disp % disp_factor) == 0) {
 468         int new_disp = disp / disp_factor;
 469         if (is8bit(new_disp)) {
 470           disp = new_disp;
 471         }
 472       } else {
 473         return false;
 474       }
 475     }
 476   }
 477   return is8bit(disp);
 478 }
 479 
 480 
 481 void Assembler::emit_operand(Register reg, Register base, Register index,
 482                              Address::ScaleFactor scale, int disp,
 483                              RelocationHolder const& rspec,
 484                              int rip_relative_correction) {
 485   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 486 
 487   // Encode the registers as needed in the fields they are used in
 488 
 489   int regenc = encode(reg) << 3;
 490   int indexenc = index->is_valid() ? encode(index) << 3 : 0;
 491   int baseenc = base->is_valid() ? encode(base) : 0;
 492 
 493   if (base->is_valid()) {
 494     if (index->is_valid()) {
 495       assert(scale != Address::no_scale, "inconsistent address");
 496       // [base + index*scale + disp]
 497       if (disp == 0 && rtype == relocInfo::none  &&
 498           base != rbp LP64_ONLY(&& base != r13)) {
 499         // [base + index*scale]
 500         // [00 reg 100][ss index base]
 501         assert(index != rsp, "illegal addressing mode");
 502         emit_int8(0x04 | regenc);
 503         emit_int8(scale << 6 | indexenc | baseenc);
 504       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 505         // [base + index*scale + imm8]
 506         // [01 reg 100][ss index base] imm8
 507         assert(index != rsp, "illegal addressing mode");
 508         emit_int8(0x44 | regenc);
 509         emit_int8(scale << 6 | indexenc | baseenc);
 510         emit_int8(disp & 0xFF);
 511       } else {
 512         // [base + index*scale + disp32]
 513         // [10 reg 100][ss index base] disp32
 514         assert(index != rsp, "illegal addressing mode");
 515         emit_int8(0x84 | regenc);
 516         emit_int8(scale << 6 | indexenc | baseenc);
 517         emit_data(disp, rspec, disp32_operand);
 518       }
 519     } else if (base == rsp LP64_ONLY(|| base == r12)) {
 520       // [rsp + disp]
 521       if (disp == 0 && rtype == relocInfo::none) {
 522         // [rsp]
 523         // [00 reg 100][00 100 100]
 524         emit_int8(0x04 | regenc);
 525         emit_int8(0x24);
 526       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 527         // [rsp + imm8]
 528         // [01 reg 100][00 100 100] disp8
 529         emit_int8(0x44 | regenc);
 530         emit_int8(0x24);
 531         emit_int8(disp & 0xFF);
 532       } else {
 533         // [rsp + imm32]
 534         // [10 reg 100][00 100 100] disp32
 535         emit_int8(0x84 | regenc);
 536         emit_int8(0x24);
 537         emit_data(disp, rspec, disp32_operand);
 538       }
 539     } else {
 540       // [base + disp]
 541       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
 542       if (disp == 0 && rtype == relocInfo::none &&
 543           base != rbp LP64_ONLY(&& base != r13)) {
 544         // [base]
 545         // [00 reg base]
 546         emit_int8(0x00 | regenc | baseenc);
 547       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 548         // [base + disp8]
 549         // [01 reg base] disp8
 550         emit_int8(0x40 | regenc | baseenc);
 551         emit_int8(disp & 0xFF);
 552       } else {
 553         // [base + disp32]
 554         // [10 reg base] disp32
 555         emit_int8(0x80 | regenc | baseenc);
 556         emit_data(disp, rspec, disp32_operand);
 557       }
 558     }
 559   } else {
 560     if (index->is_valid()) {
 561       assert(scale != Address::no_scale, "inconsistent address");
 562       // [index*scale + disp]
 563       // [00 reg 100][ss index 101] disp32
 564       assert(index != rsp, "illegal addressing mode");
 565       emit_int8(0x04 | regenc);
 566       emit_int8(scale << 6 | indexenc | 0x05);
 567       emit_data(disp, rspec, disp32_operand);
 568     } else if (rtype != relocInfo::none ) {
 569       // [disp] (64bit) RIP-RELATIVE (32bit) abs
 570       // [00 000 101] disp32
 571 
 572       emit_int8(0x05 | regenc);
 573       // Note that the RIP-rel. correction applies to the generated
 574       // disp field, but _not_ to the target address in the rspec.
 575 
 576       // disp was created by converting the target address minus the pc
 577       // at the start of the instruction. That needs more correction here.
 578       // intptr_t disp = target - next_ip;
 579       assert(inst_mark() != NULL, "must be inside InstructionMark");
 580       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 581       int64_t adjusted = disp;
 582       // Do rip-rel adjustment for 64bit
 583       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 584       assert(is_simm32(adjusted),
 585              "must be 32bit offset (RIP relative address)");
 586       emit_data((int32_t) adjusted, rspec, disp32_operand);
 587 
 588     } else {
 589       // 32bit never did this, did everything as the rip-rel/disp code above
 590       // [disp] ABSOLUTE
 591       // [00 reg 100][00 100 101] disp32
 592       emit_int8(0x04 | regenc);
 593       emit_int8(0x25);
 594       emit_data(disp, rspec, disp32_operand);
 595     }
 596   }
 597 }
 598 
 599 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 600                              Address::ScaleFactor scale, int disp,
 601                              RelocationHolder const& rspec) {
 602   if (UseAVX > 2) {
 603     int xreg_enc = reg->encoding();
 604     if (xreg_enc > 15) {
 605       XMMRegister new_reg = as_XMMRegister(xreg_enc & 0xf);
 606       emit_operand((Register)new_reg, base, index, scale, disp, rspec);
 607       return;
 608     }
 609   }
 610   emit_operand((Register)reg, base, index, scale, disp, rspec);
 611 }
 612 
 613 // Secret local extension to Assembler::WhichOperand:
 614 #define end_pc_operand (_WhichOperand_limit)
 615 
 616 address Assembler::locate_operand(address inst, WhichOperand which) {
 617   // Decode the given instruction, and return the address of
 618   // an embedded 32-bit operand word.
 619 
 620   // If "which" is disp32_operand, selects the displacement portion
 621   // of an effective address specifier.
 622   // If "which" is imm64_operand, selects the trailing immediate constant.
 623   // If "which" is call32_operand, selects the displacement of a call or jump.
 624   // Caller is responsible for ensuring that there is such an operand,
 625   // and that it is 32/64 bits wide.
 626 
 627   // If "which" is end_pc_operand, find the end of the instruction.
 628 
 629   address ip = inst;
 630   bool is_64bit = false;
 631 
 632   debug_only(bool has_disp32 = false);
 633   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
 634 
 635   again_after_prefix:
 636   switch (0xFF & *ip++) {
 637 
 638   // These convenience macros generate groups of "case" labels for the switch.
 639 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
 640 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
 641              case (x)+4: case (x)+5: case (x)+6: case (x)+7
 642 #define REP16(x) REP8((x)+0): \
 643               case REP8((x)+8)
 644 
 645   case CS_segment:
 646   case SS_segment:
 647   case DS_segment:
 648   case ES_segment:
 649   case FS_segment:
 650   case GS_segment:
 651     // Seems dubious
 652     LP64_ONLY(assert(false, "shouldn't have that prefix"));
 653     assert(ip == inst+1, "only one prefix allowed");
 654     goto again_after_prefix;
 655 
 656   case 0x67:
 657   case REX:
 658   case REX_B:
 659   case REX_X:
 660   case REX_XB:
 661   case REX_R:
 662   case REX_RB:
 663   case REX_RX:
 664   case REX_RXB:
 665     NOT_LP64(assert(false, "64bit prefixes"));
 666     goto again_after_prefix;
 667 
 668   case REX_W:
 669   case REX_WB:
 670   case REX_WX:
 671   case REX_WXB:
 672   case REX_WR:
 673   case REX_WRB:
 674   case REX_WRX:
 675   case REX_WRXB:
 676     NOT_LP64(assert(false, "64bit prefixes"));
 677     is_64bit = true;
 678     goto again_after_prefix;
 679 
 680   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
 681   case 0x88: // movb a, r
 682   case 0x89: // movl a, r
 683   case 0x8A: // movb r, a
 684   case 0x8B: // movl r, a
 685   case 0x8F: // popl a
 686     debug_only(has_disp32 = true);
 687     break;
 688 
 689   case 0x68: // pushq #32
 690     if (which == end_pc_operand) {
 691       return ip + 4;
 692     }
 693     assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
 694     return ip;                  // not produced by emit_operand
 695 
 696   case 0x66: // movw ... (size prefix)
 697     again_after_size_prefix2:
 698     switch (0xFF & *ip++) {
 699     case REX:
 700     case REX_B:
 701     case REX_X:
 702     case REX_XB:
 703     case REX_R:
 704     case REX_RB:
 705     case REX_RX:
 706     case REX_RXB:
 707     case REX_W:
 708     case REX_WB:
 709     case REX_WX:
 710     case REX_WXB:
 711     case REX_WR:
 712     case REX_WRB:
 713     case REX_WRX:
 714     case REX_WRXB:
 715       NOT_LP64(assert(false, "64bit prefix found"));
 716       goto again_after_size_prefix2;
 717     case 0x8B: // movw r, a
 718     case 0x89: // movw a, r
 719       debug_only(has_disp32 = true);
 720       break;
 721     case 0xC7: // movw a, #16
 722       debug_only(has_disp32 = true);
 723       tail_size = 2;  // the imm16
 724       break;
 725     case 0x0F: // several SSE/SSE2 variants
 726       ip--;    // reparse the 0x0F
 727       goto again_after_prefix;
 728     default:
 729       ShouldNotReachHere();
 730     }
 731     break;
 732 
 733   case REP8(0xB8): // movl/q r, #32/#64(oop?)
 734     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
 735     // these asserts are somewhat nonsensical
 736 #ifndef _LP64
 737     assert(which == imm_operand || which == disp32_operand,
 738            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 739 #else
 740     assert((which == call32_operand || which == imm_operand) && is_64bit ||
 741            which == narrow_oop_operand && !is_64bit,
 742            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 743 #endif // _LP64
 744     return ip;
 745 
 746   case 0x69: // imul r, a, #32
 747   case 0xC7: // movl a, #32(oop?)
 748     tail_size = 4;
 749     debug_only(has_disp32 = true); // has both kinds of operands!
 750     break;
 751 
 752   case 0x0F: // movx..., etc.
 753     switch (0xFF & *ip++) {
 754     case 0x3A: // pcmpestri
 755       tail_size = 1;
 756     case 0x38: // ptest, pmovzxbw
 757       ip++; // skip opcode
 758       debug_only(has_disp32 = true); // has both kinds of operands!
 759       break;
 760 
 761     case 0x70: // pshufd r, r/a, #8
 762       debug_only(has_disp32 = true); // has both kinds of operands!
 763     case 0x73: // psrldq r, #8
 764       tail_size = 1;
 765       break;
 766 
 767     case 0x12: // movlps
 768     case 0x28: // movaps
 769     case 0x2E: // ucomiss
 770     case 0x2F: // comiss
 771     case 0x54: // andps
 772     case 0x55: // andnps
 773     case 0x56: // orps
 774     case 0x57: // xorps
 775     case 0x58: // addpd
 776     case 0x59: // mulpd
 777     case 0x6E: // movd
 778     case 0x7E: // movd
 779     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
 780     case 0xFE: // paddd
 781       debug_only(has_disp32 = true);
 782       break;
 783 
 784     case 0xAD: // shrd r, a, %cl
 785     case 0xAF: // imul r, a
 786     case 0xBE: // movsbl r, a (movsxb)
 787     case 0xBF: // movswl r, a (movsxw)
 788     case 0xB6: // movzbl r, a (movzxb)
 789     case 0xB7: // movzwl r, a (movzxw)
 790     case REP16(0x40): // cmovl cc, r, a
 791     case 0xB0: // cmpxchgb
 792     case 0xB1: // cmpxchg
 793     case 0xC1: // xaddl
 794     case 0xC7: // cmpxchg8
 795     case REP16(0x90): // setcc a
 796       debug_only(has_disp32 = true);
 797       // fall out of the switch to decode the address
 798       break;
 799 
 800     case 0xC4: // pinsrw r, a, #8
 801       debug_only(has_disp32 = true);
 802     case 0xC5: // pextrw r, r, #8
 803       tail_size = 1;  // the imm8
 804       break;
 805 
 806     case 0xAC: // shrd r, a, #8
 807       debug_only(has_disp32 = true);
 808       tail_size = 1;  // the imm8
 809       break;
 810 
 811     case REP16(0x80): // jcc rdisp32
 812       if (which == end_pc_operand)  return ip + 4;
 813       assert(which == call32_operand, "jcc has no disp32 or imm");
 814       return ip;
 815     default:
 816       ShouldNotReachHere();
 817     }
 818     break;
 819 
 820   case 0x81: // addl a, #32; addl r, #32
 821     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 822     // on 32bit in the case of cmpl, the imm might be an oop
 823     tail_size = 4;
 824     debug_only(has_disp32 = true); // has both kinds of operands!
 825     break;
 826 
 827   case 0x83: // addl a, #8; addl r, #8
 828     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 829     debug_only(has_disp32 = true); // has both kinds of operands!
 830     tail_size = 1;
 831     break;
 832 
 833   case 0x9B:
 834     switch (0xFF & *ip++) {
 835     case 0xD9: // fnstcw a
 836       debug_only(has_disp32 = true);
 837       break;
 838     default:
 839       ShouldNotReachHere();
 840     }
 841     break;
 842 
 843   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
 844   case REP4(0x10): // adc...
 845   case REP4(0x20): // and...
 846   case REP4(0x30): // xor...
 847   case REP4(0x08): // or...
 848   case REP4(0x18): // sbb...
 849   case REP4(0x28): // sub...
 850   case 0xF7: // mull a
 851   case 0x8D: // lea r, a
 852   case 0x87: // xchg r, a
 853   case REP4(0x38): // cmp...
 854   case 0x85: // test r, a
 855     debug_only(has_disp32 = true); // has both kinds of operands!
 856     break;
 857 
 858   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
 859   case 0xC6: // movb a, #8
 860   case 0x80: // cmpb a, #8
 861   case 0x6B: // imul r, a, #8
 862     debug_only(has_disp32 = true); // has both kinds of operands!
 863     tail_size = 1; // the imm8
 864     break;
 865 
 866   case 0xC4: // VEX_3bytes
 867   case 0xC5: // VEX_2bytes
 868     assert((UseAVX > 0), "shouldn't have VEX prefix");
 869     assert(ip == inst+1, "no prefixes allowed");
 870     // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
 871     // but they have prefix 0x0F and processed when 0x0F processed above.
 872     //
 873     // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
 874     // instructions (these instructions are not supported in 64-bit mode).
 875     // To distinguish them bits [7:6] are set in the VEX second byte since
 876     // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
 877     // those VEX bits REX and vvvv bits are inverted.
 878     //
 879     // Fortunately C2 doesn't generate these instructions so we don't need
 880     // to check for them in product version.
 881 
 882     // Check second byte
 883     NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
 884 
 885     int vex_opcode;
 886     // First byte
 887     if ((0xFF & *inst) == VEX_3bytes) {
 888       vex_opcode = VEX_OPCODE_MASK & *ip;
 889       ip++; // third byte
 890       is_64bit = ((VEX_W & *ip) == VEX_W);
 891     } else {
 892       vex_opcode = VEX_OPCODE_0F;
 893     }
 894     ip++; // opcode
 895     // To find the end of instruction (which == end_pc_operand).
 896     switch (vex_opcode) {
 897       case VEX_OPCODE_0F:
 898         switch (0xFF & *ip) {
 899         case 0x70: // pshufd r, r/a, #8
 900         case 0x71: // ps[rl|ra|ll]w r, #8
 901         case 0x72: // ps[rl|ra|ll]d r, #8
 902         case 0x73: // ps[rl|ra|ll]q r, #8
 903         case 0xC2: // cmp[ps|pd|ss|sd] r, r, r/a, #8
 904         case 0xC4: // pinsrw r, r, r/a, #8
 905         case 0xC5: // pextrw r/a, r, #8
 906         case 0xC6: // shufp[s|d] r, r, r/a, #8
 907           tail_size = 1;  // the imm8
 908           break;
 909         }
 910         break;
 911       case VEX_OPCODE_0F_3A:
 912         tail_size = 1;
 913         break;
 914     }
 915     ip++; // skip opcode
 916     debug_only(has_disp32 = true); // has both kinds of operands!
 917     break;
 918 
 919   case 0x62: // EVEX_4bytes
 920     assert((UseAVX > 0), "shouldn't have EVEX prefix");
 921     assert(ip == inst+1, "no prefixes allowed");
 922     // no EVEX collisions, all instructions that have 0x62 opcodes
 923     // have EVEX versions and are subopcodes of 0x66
 924     ip++; // skip P0 and exmaine W in P1
 925     is_64bit = ((VEX_W & *ip) == VEX_W);
 926     ip++; // move to P2
 927     ip++; // skip P2, move to opcode
 928     // To find the end of instruction (which == end_pc_operand).
 929     switch (0xFF & *ip) {
 930     case 0x22: // pinsrd r, r/a, #8
 931     case 0x61: // pcmpestri r, r/a, #8
 932     case 0x70: // pshufd r, r/a, #8
 933     case 0x73: // psrldq r, #8
 934       tail_size = 1;  // the imm8
 935       break;
 936     default:
 937       break;
 938     }
 939     ip++; // skip opcode
 940     debug_only(has_disp32 = true); // has both kinds of operands!
 941     break;
 942 
 943   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 944   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 945   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 946   case 0xDD: // fld_d a; fst_d a; fstp_d a
 947   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 948   case 0xDF: // fild_d a; fistp_d a
 949   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 950   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 951   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 952     debug_only(has_disp32 = true);
 953     break;
 954 
 955   case 0xE8: // call rdisp32
 956   case 0xE9: // jmp  rdisp32
 957     if (which == end_pc_operand)  return ip + 4;
 958     assert(which == call32_operand, "call has no disp32 or imm");
 959     return ip;
 960 
 961   case 0xF0:                    // Lock
 962     assert(os::is_MP(), "only on MP");
 963     goto again_after_prefix;
 964 
 965   case 0xF3:                    // For SSE
 966   case 0xF2:                    // For SSE2
 967     switch (0xFF & *ip++) {
 968     case REX:
 969     case REX_B:
 970     case REX_X:
 971     case REX_XB:
 972     case REX_R:
 973     case REX_RB:
 974     case REX_RX:
 975     case REX_RXB:
 976     case REX_W:
 977     case REX_WB:
 978     case REX_WX:
 979     case REX_WXB:
 980     case REX_WR:
 981     case REX_WRB:
 982     case REX_WRX:
 983     case REX_WRXB:
 984       NOT_LP64(assert(false, "found 64bit prefix"));
 985       ip++;
 986     default:
 987       ip++;
 988     }
 989     debug_only(has_disp32 = true); // has both kinds of operands!
 990     break;
 991 
 992   default:
 993     ShouldNotReachHere();
 994 
 995 #undef REP8
 996 #undef REP16
 997   }
 998 
 999   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
1000 #ifdef _LP64
1001   assert(which != imm_operand, "instruction is not a movq reg, imm64");
1002 #else
1003   // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
1004   assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
1005 #endif // LP64
1006   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
1007 
1008   // parse the output of emit_operand
1009   int op2 = 0xFF & *ip++;
1010   int base = op2 & 0x07;
1011   int op3 = -1;
1012   const int b100 = 4;
1013   const int b101 = 5;
1014   if (base == b100 && (op2 >> 6) != 3) {
1015     op3 = 0xFF & *ip++;
1016     base = op3 & 0x07;   // refetch the base
1017   }
1018   // now ip points at the disp (if any)
1019 
1020   switch (op2 >> 6) {
1021   case 0:
1022     // [00 reg  100][ss index base]
1023     // [00 reg  100][00   100  esp]
1024     // [00 reg base]
1025     // [00 reg  100][ss index  101][disp32]
1026     // [00 reg  101]               [disp32]
1027 
1028     if (base == b101) {
1029       if (which == disp32_operand)
1030         return ip;              // caller wants the disp32
1031       ip += 4;                  // skip the disp32
1032     }
1033     break;
1034 
1035   case 1:
1036     // [01 reg  100][ss index base][disp8]
1037     // [01 reg  100][00   100  esp][disp8]
1038     // [01 reg base]               [disp8]
1039     ip += 1;                    // skip the disp8
1040     break;
1041 
1042   case 2:
1043     // [10 reg  100][ss index base][disp32]
1044     // [10 reg  100][00   100  esp][disp32]
1045     // [10 reg base]               [disp32]
1046     if (which == disp32_operand)
1047       return ip;                // caller wants the disp32
1048     ip += 4;                    // skip the disp32
1049     break;
1050 
1051   case 3:
1052     // [11 reg base]  (not a memory addressing mode)
1053     break;
1054   }
1055 
1056   if (which == end_pc_operand) {
1057     return ip + tail_size;
1058   }
1059 
1060 #ifdef _LP64
1061   assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
1062 #else
1063   assert(which == imm_operand, "instruction has only an imm field");
1064 #endif // LP64
1065   return ip;
1066 }
1067 
1068 address Assembler::locate_next_instruction(address inst) {
1069   // Secretly share code with locate_operand:
1070   return locate_operand(inst, end_pc_operand);
1071 }
1072 
1073 
1074 #ifdef ASSERT
1075 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
1076   address inst = inst_mark();
1077   assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
1078   address opnd;
1079 
1080   Relocation* r = rspec.reloc();
1081   if (r->type() == relocInfo::none) {
1082     return;
1083   } else if (r->is_call() || format == call32_operand) {
1084     // assert(format == imm32_operand, "cannot specify a nonzero format");
1085     opnd = locate_operand(inst, call32_operand);
1086   } else if (r->is_data()) {
1087     assert(format == imm_operand || format == disp32_operand
1088            LP64_ONLY(|| format == narrow_oop_operand), "format ok");
1089     opnd = locate_operand(inst, (WhichOperand)format);
1090   } else {
1091     assert(format == imm_operand, "cannot specify a format");
1092     return;
1093   }
1094   assert(opnd == pc(), "must put operand where relocs can find it");
1095 }
1096 #endif // ASSERT
1097 
1098 void Assembler::emit_operand32(Register reg, Address adr) {
1099   assert(reg->encoding() < 8, "no extended registers");
1100   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1101   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1102                adr._rspec);
1103 }
1104 
1105 void Assembler::emit_operand(Register reg, Address adr,
1106                              int rip_relative_correction) {
1107   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1108                adr._rspec,
1109                rip_relative_correction);
1110 }
1111 
1112 void Assembler::emit_operand(XMMRegister reg, Address adr) {
1113   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1114                adr._rspec);
1115 }
1116 
1117 // MMX operations
1118 void Assembler::emit_operand(MMXRegister reg, Address adr) {
1119   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1120   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1121 }
1122 
1123 // work around gcc (3.2.1-7a) bug
1124 void Assembler::emit_operand(Address adr, MMXRegister reg) {
1125   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1126   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1127 }
1128 
1129 
1130 void Assembler::emit_farith(int b1, int b2, int i) {
1131   assert(isByte(b1) && isByte(b2), "wrong opcode");
1132   assert(0 <= i &&  i < 8, "illegal stack offset");
1133   emit_int8(b1);
1134   emit_int8(b2 + i);
1135 }
1136 
1137 
1138 // Now the Assembler instructions (identical for 32/64 bits)
1139 
1140 void Assembler::adcl(Address dst, int32_t imm32) {
1141   InstructionMark im(this);
1142   prefix(dst);
1143   emit_arith_operand(0x81, rdx, dst, imm32);
1144 }
1145 
1146 void Assembler::adcl(Address dst, Register src) {
1147   InstructionMark im(this);
1148   prefix(dst, src);
1149   emit_int8(0x11);
1150   emit_operand(src, dst);
1151 }
1152 
1153 void Assembler::adcl(Register dst, int32_t imm32) {
1154   prefix(dst);
1155   emit_arith(0x81, 0xD0, dst, imm32);
1156 }
1157 
1158 void Assembler::adcl(Register dst, Address src) {
1159   InstructionMark im(this);
1160   prefix(src, dst);
1161   emit_int8(0x13);
1162   emit_operand(dst, src);
1163 }
1164 
1165 void Assembler::adcl(Register dst, Register src) {
1166   (void) prefix_and_encode(dst->encoding(), src->encoding());
1167   emit_arith(0x13, 0xC0, dst, src);
1168 }
1169 
1170 void Assembler::addl(Address dst, int32_t imm32) {
1171   InstructionMark im(this);
1172   prefix(dst);
1173   emit_arith_operand(0x81, rax, dst, imm32);
1174 }
1175 
1176 void Assembler::addb(Address dst, int imm8) {
1177   InstructionMark im(this);
1178   prefix(dst);
1179   emit_int8((unsigned char)0x80);
1180   emit_operand(rax, dst, 1);
1181   emit_int8(imm8);
1182 }
1183 
1184 void Assembler::addw(Address dst, int imm16) {
1185   InstructionMark im(this);
1186   emit_int8(0x66);
1187   prefix(dst);
1188   emit_int8((unsigned char)0x81);
1189   emit_operand(rax, dst, 2);
1190   emit_int16(imm16);
1191 }
1192 
1193 void Assembler::addl(Address dst, Register src) {
1194   InstructionMark im(this);
1195   prefix(dst, src);
1196   emit_int8(0x01);
1197   emit_operand(src, dst);
1198 }
1199 
1200 void Assembler::addl(Register dst, int32_t imm32) {
1201   prefix(dst);
1202   emit_arith(0x81, 0xC0, dst, imm32);
1203 }
1204 
1205 void Assembler::addl(Register dst, Address src) {
1206   InstructionMark im(this);
1207   prefix(src, dst);
1208   emit_int8(0x03);
1209   emit_operand(dst, src);
1210 }
1211 
1212 void Assembler::addl(Register dst, Register src) {
1213   (void) prefix_and_encode(dst->encoding(), src->encoding());
1214   emit_arith(0x03, 0xC0, dst, src);
1215 }
1216 
1217 void Assembler::addr_nop_4() {
1218   assert(UseAddressNop, "no CPU support");
1219   // 4 bytes: NOP DWORD PTR [EAX+0]
1220   emit_int8(0x0F);
1221   emit_int8(0x1F);
1222   emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
1223   emit_int8(0);    // 8-bits offset (1 byte)
1224 }
1225 
1226 void Assembler::addr_nop_5() {
1227   assert(UseAddressNop, "no CPU support");
1228   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
1229   emit_int8(0x0F);
1230   emit_int8(0x1F);
1231   emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
1232   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1233   emit_int8(0);    // 8-bits offset (1 byte)
1234 }
1235 
1236 void Assembler::addr_nop_7() {
1237   assert(UseAddressNop, "no CPU support");
1238   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
1239   emit_int8(0x0F);
1240   emit_int8(0x1F);
1241   emit_int8((unsigned char)0x80);
1242                    // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
1243   emit_int32(0);   // 32-bits offset (4 bytes)
1244 }
1245 
1246 void Assembler::addr_nop_8() {
1247   assert(UseAddressNop, "no CPU support");
1248   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1249   emit_int8(0x0F);
1250   emit_int8(0x1F);
1251   emit_int8((unsigned char)0x84);
1252                    // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
1253   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1254   emit_int32(0);   // 32-bits offset (4 bytes)
1255 }
1256 
1257 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
1258   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1259   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1260   attributes.set_rex_vex_w_reverted();
1261   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1262   emit_int8(0x58);
1263   emit_int8((unsigned char)(0xC0 | encode));
1264 }
1265 
1266 void Assembler::addsd(XMMRegister dst, Address src) {
1267   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1268   InstructionMark im(this);
1269   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1270   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1271   attributes.set_rex_vex_w_reverted();
1272   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1273   emit_int8(0x58);
1274   emit_operand(dst, src);
1275 }
1276 
1277 void Assembler::addss(XMMRegister dst, XMMRegister src) {
1278   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1279   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1280   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1281   emit_int8(0x58);
1282   emit_int8((unsigned char)(0xC0 | encode));
1283 }
1284 
1285 void Assembler::addss(XMMRegister dst, Address src) {
1286   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1287   InstructionMark im(this);
1288   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1289   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1290   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1291   emit_int8(0x58);
1292   emit_operand(dst, src);
1293 }
1294 
1295 void Assembler::aesdec(XMMRegister dst, Address src) {
1296   assert(VM_Version::supports_aes(), "");
1297   InstructionMark im(this);
1298   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1299   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1300   emit_int8((unsigned char)0xDE);
1301   emit_operand(dst, src);
1302 }
1303 
1304 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1305   assert(VM_Version::supports_aes(), "");
1306   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1307   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1308   emit_int8((unsigned char)0xDE);
1309   emit_int8(0xC0 | encode);
1310 }
1311 
1312 void Assembler::aesdeclast(XMMRegister dst, Address src) {
1313   assert(VM_Version::supports_aes(), "");
1314   InstructionMark im(this);
1315   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1316   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1317   emit_int8((unsigned char)0xDF);
1318   emit_operand(dst, src);
1319 }
1320 
1321 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1322   assert(VM_Version::supports_aes(), "");
1323   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1324   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1325   emit_int8((unsigned char)0xDF);
1326   emit_int8((unsigned char)(0xC0 | encode));
1327 }
1328 
1329 void Assembler::aesenc(XMMRegister dst, Address src) {
1330   assert(VM_Version::supports_aes(), "");
1331   InstructionMark im(this);
1332   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1333   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1334   emit_int8((unsigned char)0xDC);
1335   emit_operand(dst, src);
1336 }
1337 
1338 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1339   assert(VM_Version::supports_aes(), "");
1340   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1341   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1342   emit_int8((unsigned char)0xDC);
1343   emit_int8(0xC0 | encode);
1344 }
1345 
1346 void Assembler::aesenclast(XMMRegister dst, Address src) {
1347   assert(VM_Version::supports_aes(), "");
1348   InstructionMark im(this);
1349   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1350   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1351   emit_int8((unsigned char)0xDD);
1352   emit_operand(dst, src);
1353 }
1354 
1355 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1356   assert(VM_Version::supports_aes(), "");
1357   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1358   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1359   emit_int8((unsigned char)0xDD);
1360   emit_int8((unsigned char)(0xC0 | encode));
1361 }
1362 
1363 void Assembler::andl(Address dst, int32_t imm32) {
1364   InstructionMark im(this);
1365   prefix(dst);
1366   emit_int8((unsigned char)0x81);
1367   emit_operand(rsp, dst, 4);
1368   emit_int32(imm32);
1369 }
1370 
1371 void Assembler::andl(Register dst, int32_t imm32) {
1372   prefix(dst);
1373   emit_arith(0x81, 0xE0, dst, imm32);
1374 }
1375 
1376 void Assembler::andl(Register dst, Address src) {
1377   InstructionMark im(this);
1378   prefix(src, dst);
1379   emit_int8(0x23);
1380   emit_operand(dst, src);
1381 }
1382 
1383 void Assembler::andl(Register dst, Register src) {
1384   (void) prefix_and_encode(dst->encoding(), src->encoding());
1385   emit_arith(0x23, 0xC0, dst, src);
1386 }
1387 
1388 void Assembler::andnl(Register dst, Register src1, Register src2) {
1389   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1390   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1391   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1392   emit_int8((unsigned char)0xF2);
1393   emit_int8((unsigned char)(0xC0 | encode));
1394 }
1395 
1396 void Assembler::andnl(Register dst, Register src1, Address src2) {
1397   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1398   InstructionMark im(this);
1399   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1400   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1401   emit_int8((unsigned char)0xF2);
1402   emit_operand(dst, src2);
1403 }
1404 
1405 void Assembler::bsfl(Register dst, Register src) {
1406   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1407   emit_int8(0x0F);
1408   emit_int8((unsigned char)0xBC);
1409   emit_int8((unsigned char)(0xC0 | encode));
1410 }
1411 
1412 void Assembler::bsrl(Register dst, Register src) {
1413   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1414   emit_int8(0x0F);
1415   emit_int8((unsigned char)0xBD);
1416   emit_int8((unsigned char)(0xC0 | encode));
1417 }
1418 
1419 void Assembler::bswapl(Register reg) { // bswap
1420   int encode = prefix_and_encode(reg->encoding());
1421   emit_int8(0x0F);
1422   emit_int8((unsigned char)(0xC8 | encode));
1423 }
1424 
1425 void Assembler::blsil(Register dst, Register src) {
1426   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1427   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1428   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1429   emit_int8((unsigned char)0xF3);
1430   emit_int8((unsigned char)(0xC0 | encode));
1431 }
1432 
1433 void Assembler::blsil(Register dst, Address src) {
1434   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1435   InstructionMark im(this);
1436   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1437   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1438   emit_int8((unsigned char)0xF3);
1439   emit_operand(rbx, src);
1440 }
1441 
1442 void Assembler::blsmskl(Register dst, Register src) {
1443   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1444   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1445   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1446   emit_int8((unsigned char)0xF3);
1447   emit_int8((unsigned char)(0xC0 | encode));
1448 }
1449 
1450 void Assembler::blsmskl(Register dst, Address src) {
1451   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1452   InstructionMark im(this);
1453   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1454   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1455   emit_int8((unsigned char)0xF3);
1456   emit_operand(rdx, src);
1457 }
1458 
1459 void Assembler::blsrl(Register dst, Register src) {
1460   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1461   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1462   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1463   emit_int8((unsigned char)0xF3);
1464   emit_int8((unsigned char)(0xC0 | encode));
1465 }
1466 
1467 void Assembler::blsrl(Register dst, Address src) {
1468   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1469   InstructionMark im(this);
1470   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1471   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1472   emit_int8((unsigned char)0xF3);
1473   emit_operand(rcx, src);
1474 }
1475 
1476 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1477   // suspect disp32 is always good
1478   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1479 
1480   if (L.is_bound()) {
1481     const int long_size = 5;
1482     int offs = (int)( target(L) - pc() );
1483     assert(offs <= 0, "assembler error");
1484     InstructionMark im(this);
1485     // 1110 1000 #32-bit disp
1486     emit_int8((unsigned char)0xE8);
1487     emit_data(offs - long_size, rtype, operand);
1488   } else {
1489     InstructionMark im(this);
1490     // 1110 1000 #32-bit disp
1491     L.add_patch_at(code(), locator());
1492 
1493     emit_int8((unsigned char)0xE8);
1494     emit_data(int(0), rtype, operand);
1495   }
1496 }
1497 
1498 void Assembler::call(Register dst) {
1499   int encode = prefix_and_encode(dst->encoding());
1500   emit_int8((unsigned char)0xFF);
1501   emit_int8((unsigned char)(0xD0 | encode));
1502 }
1503 
1504 
1505 void Assembler::call(Address adr) {
1506   InstructionMark im(this);
1507   prefix(adr);
1508   emit_int8((unsigned char)0xFF);
1509   emit_operand(rdx, adr);
1510 }
1511 
1512 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1513   assert(entry != NULL, "call most probably wrong");
1514   InstructionMark im(this);
1515   emit_int8((unsigned char)0xE8);
1516   intptr_t disp = entry - (pc() + sizeof(int32_t));
1517   assert(is_simm32(disp), "must be 32bit offset (call2)");
1518   // Technically, should use call32_operand, but this format is
1519   // implied by the fact that we're emitting a call instruction.
1520 
1521   int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1522   emit_data((int) disp, rspec, operand);
1523 }
1524 
1525 void Assembler::cdql() {
1526   emit_int8((unsigned char)0x99);
1527 }
1528 
1529 void Assembler::cld() {
1530   emit_int8((unsigned char)0xFC);
1531 }
1532 
1533 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1534   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1535   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1536   emit_int8(0x0F);
1537   emit_int8(0x40 | cc);
1538   emit_int8((unsigned char)(0xC0 | encode));
1539 }
1540 
1541 
1542 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1543   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1544   prefix(src, dst);
1545   emit_int8(0x0F);
1546   emit_int8(0x40 | cc);
1547   emit_operand(dst, src);
1548 }
1549 
1550 void Assembler::cmpb(Address dst, int imm8) {
1551   InstructionMark im(this);
1552   prefix(dst);
1553   emit_int8((unsigned char)0x80);
1554   emit_operand(rdi, dst, 1);
1555   emit_int8(imm8);
1556 }
1557 
1558 void Assembler::cmpl(Address dst, int32_t imm32) {
1559   InstructionMark im(this);
1560   prefix(dst);
1561   emit_int8((unsigned char)0x81);
1562   emit_operand(rdi, dst, 4);
1563   emit_int32(imm32);
1564 }
1565 
1566 void Assembler::cmpl(Register dst, int32_t imm32) {
1567   prefix(dst);
1568   emit_arith(0x81, 0xF8, dst, imm32);
1569 }
1570 
1571 void Assembler::cmpl(Register dst, Register src) {
1572   (void) prefix_and_encode(dst->encoding(), src->encoding());
1573   emit_arith(0x3B, 0xC0, dst, src);
1574 }
1575 
1576 void Assembler::cmpl(Register dst, Address  src) {
1577   InstructionMark im(this);
1578   prefix(src, dst);
1579   emit_int8((unsigned char)0x3B);
1580   emit_operand(dst, src);
1581 }
1582 
1583 void Assembler::cmpw(Address dst, int imm16) {
1584   InstructionMark im(this);
1585   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1586   emit_int8(0x66);
1587   emit_int8((unsigned char)0x81);
1588   emit_operand(rdi, dst, 2);
1589   emit_int16(imm16);
1590 }
1591 
1592 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1593 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1594 // The ZF is set if the compared values were equal, and cleared otherwise.
1595 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1596   InstructionMark im(this);
1597   prefix(adr, reg);
1598   emit_int8(0x0F);
1599   emit_int8((unsigned char)0xB1);
1600   emit_operand(reg, adr);
1601 }
1602 
1603 // The 8-bit cmpxchg compares the value at adr with the contents of rax,
1604 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1605 // The ZF is set if the compared values were equal, and cleared otherwise.
1606 void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg
1607   InstructionMark im(this);
1608   prefix(adr, reg, true);
1609   emit_int8(0x0F);
1610   emit_int8((unsigned char)0xB0);
1611   emit_operand(reg, adr);
1612 }
1613 
1614 void Assembler::comisd(XMMRegister dst, Address src) {
1615   // NOTE: dbx seems to decode this as comiss even though the
1616   // 0x66 is there. Strangly ucomisd comes out correct
1617   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1618   InstructionMark im(this);
1619   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);;
1620   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1621   attributes.set_rex_vex_w_reverted();
1622   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1623   emit_int8(0x2F);
1624   emit_operand(dst, src);
1625 }
1626 
1627 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1628   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1629   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1630   attributes.set_rex_vex_w_reverted();
1631   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1632   emit_int8(0x2F);
1633   emit_int8((unsigned char)(0xC0 | encode));
1634 }
1635 
1636 void Assembler::comiss(XMMRegister dst, Address src) {
1637   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1638   InstructionMark im(this);
1639   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1640   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1641   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1642   emit_int8(0x2F);
1643   emit_operand(dst, src);
1644 }
1645 
1646 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1647   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1648   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1649   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1650   emit_int8(0x2F);
1651   emit_int8((unsigned char)(0xC0 | encode));
1652 }
1653 
1654 void Assembler::cpuid() {
1655   emit_int8(0x0F);
1656   emit_int8((unsigned char)0xA2);
1657 }
1658 
1659 // Opcode / Instruction                      Op /  En  64 - Bit Mode     Compat / Leg Mode Description                  Implemented
1660 // F2 0F 38 F0 / r       CRC32 r32, r / m8   RM        Valid             Valid             Accumulate CRC32 on r / m8.  v
1661 // F2 REX 0F 38 F0 / r   CRC32 r32, r / m8*  RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1662 // F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8   RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1663 //
1664 // F2 0F 38 F1 / r       CRC32 r32, r / m16  RM        Valid             Valid             Accumulate CRC32 on r / m16. v
1665 //
1666 // F2 0F 38 F1 / r       CRC32 r32, r / m32  RM        Valid             Valid             Accumulate CRC32 on r / m32. v
1667 //
1668 // F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64  RM        Valid             N.E.              Accumulate CRC32 on r / m64. v
1669 void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) {
1670   assert(VM_Version::supports_sse4_2(), "");
1671   int8_t w = 0x01;
1672   Prefix p = Prefix_EMPTY;
1673 
1674   emit_int8((int8_t)0xF2);
1675   switch (sizeInBytes) {
1676   case 1:
1677     w = 0;
1678     break;
1679   case 2:
1680   case 4:
1681     break;
1682   LP64_ONLY(case 8:)
1683     // This instruction is not valid in 32 bits
1684     // Note:
1685     // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
1686     //
1687     // Page B - 72   Vol. 2C says
1688     // qwreg2 to qwreg            1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2
1689     // mem64 to qwreg             1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m
1690     //                                                                            F0!!!
1691     // while 3 - 208 Vol. 2A
1692     // F2 REX.W 0F 38 F1 / r       CRC32 r64, r / m64             RM         Valid      N.E.Accumulate CRC32 on r / m64.
1693     //
1694     // the 0 on a last bit is reserved for a different flavor of this instruction :
1695     // F2 REX.W 0F 38 F0 / r       CRC32 r64, r / m8              RM         Valid      N.E.Accumulate CRC32 on r / m8.
1696     p = REX_W;
1697     break;
1698   default:
1699     assert(0, "Unsupported value for a sizeInBytes argument");
1700     break;
1701   }
1702   LP64_ONLY(prefix(crc, v, p);)
1703   emit_int8((int8_t)0x0F);
1704   emit_int8(0x38);
1705   emit_int8((int8_t)(0xF0 | w));
1706   emit_int8(0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7));
1707 }
1708 
1709 void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) {
1710   assert(VM_Version::supports_sse4_2(), "");
1711   InstructionMark im(this);
1712   int8_t w = 0x01;
1713   Prefix p = Prefix_EMPTY;
1714 
1715   emit_int8((int8_t)0xF2);
1716   switch (sizeInBytes) {
1717   case 1:
1718     w = 0;
1719     break;
1720   case 2:
1721   case 4:
1722     break;
1723   LP64_ONLY(case 8:)
1724     // This instruction is not valid in 32 bits
1725     p = REX_W;
1726     break;
1727   default:
1728     assert(0, "Unsupported value for a sizeInBytes argument");
1729     break;
1730   }
1731   LP64_ONLY(prefix(crc, adr, p);)
1732   emit_int8((int8_t)0x0F);
1733   emit_int8(0x38);
1734   emit_int8((int8_t)(0xF0 | w));
1735   emit_operand(crc, adr);
1736 }
1737 
1738 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1739   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1740   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1741   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1742   emit_int8((unsigned char)0xE6);
1743   emit_int8((unsigned char)(0xC0 | encode));
1744 }
1745 
1746 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1747   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1748   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1749   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1750   emit_int8(0x5B);
1751   emit_int8((unsigned char)(0xC0 | encode));
1752 }
1753 
1754 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1755   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1756   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1757   attributes.set_rex_vex_w_reverted();
1758   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1759   emit_int8(0x5A);
1760   emit_int8((unsigned char)(0xC0 | encode));
1761 }
1762 
1763 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1764   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1765   InstructionMark im(this);
1766   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1767   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1768   attributes.set_rex_vex_w_reverted();
1769   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1770   emit_int8(0x5A);
1771   emit_operand(dst, src);
1772 }
1773 
1774 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1775   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1776   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1777   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1778   emit_int8(0x2A);
1779   emit_int8((unsigned char)(0xC0 | encode));
1780 }
1781 
1782 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1783   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1784   InstructionMark im(this);
1785   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1786   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1787   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1788   emit_int8(0x2A);
1789   emit_operand(dst, src);
1790 }
1791 
1792 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1793   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1794   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1795   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1796   emit_int8(0x2A);
1797   emit_int8((unsigned char)(0xC0 | encode));
1798 }
1799 
1800 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1801   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1802   InstructionMark im(this);
1803   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1804   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1805   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1806   emit_int8(0x2A);
1807   emit_operand(dst, src);
1808 }
1809 
1810 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
1811   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1812   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1813   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1814   emit_int8(0x2A);
1815   emit_int8((unsigned char)(0xC0 | encode));
1816 }
1817 
1818 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1819   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1820   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1821   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1822   emit_int8(0x5A);
1823   emit_int8((unsigned char)(0xC0 | encode));
1824 }
1825 
1826 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1827   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1828   InstructionMark im(this);
1829   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1830   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1831   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1832   emit_int8(0x5A);
1833   emit_operand(dst, src);
1834 }
1835 
1836 
1837 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1838   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1839   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1840   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1841   emit_int8(0x2C);
1842   emit_int8((unsigned char)(0xC0 | encode));
1843 }
1844 
1845 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1846   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1847   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1848   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1849   emit_int8(0x2C);
1850   emit_int8((unsigned char)(0xC0 | encode));
1851 }
1852 
1853 void Assembler::cvttpd2dq(XMMRegister dst, XMMRegister src) {
1854   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1855   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
1856   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1857   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1858   emit_int8((unsigned char)0xE6);
1859   emit_int8((unsigned char)(0xC0 | encode));
1860 }
1861 
1862 void Assembler::decl(Address dst) {
1863   // Don't use it directly. Use MacroAssembler::decrement() instead.
1864   InstructionMark im(this);
1865   prefix(dst);
1866   emit_int8((unsigned char)0xFF);
1867   emit_operand(rcx, dst);
1868 }
1869 
1870 void Assembler::divsd(XMMRegister dst, Address src) {
1871   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1872   InstructionMark im(this);
1873   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1874   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1875   attributes.set_rex_vex_w_reverted();
1876   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1877   emit_int8(0x5E);
1878   emit_operand(dst, src);
1879 }
1880 
1881 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1882   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1883   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1884   attributes.set_rex_vex_w_reverted();
1885   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1886   emit_int8(0x5E);
1887   emit_int8((unsigned char)(0xC0 | encode));
1888 }
1889 
1890 void Assembler::divss(XMMRegister dst, Address src) {
1891   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1892   InstructionMark im(this);
1893   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1894   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1895   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1896   emit_int8(0x5E);
1897   emit_operand(dst, src);
1898 }
1899 
1900 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1901   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1902   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1903   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1904   emit_int8(0x5E);
1905   emit_int8((unsigned char)(0xC0 | encode));
1906 }
1907 
1908 void Assembler::emms() {
1909   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1910   emit_int8(0x0F);
1911   emit_int8(0x77);
1912 }
1913 
1914 void Assembler::hlt() {
1915   emit_int8((unsigned char)0xF4);
1916 }
1917 
1918 void Assembler::idivl(Register src) {
1919   int encode = prefix_and_encode(src->encoding());
1920   emit_int8((unsigned char)0xF7);
1921   emit_int8((unsigned char)(0xF8 | encode));
1922 }
1923 
1924 void Assembler::divl(Register src) { // Unsigned
1925   int encode = prefix_and_encode(src->encoding());
1926   emit_int8((unsigned char)0xF7);
1927   emit_int8((unsigned char)(0xF0 | encode));
1928 }
1929 
1930 void Assembler::imull(Register src) {
1931   int encode = prefix_and_encode(src->encoding());
1932   emit_int8((unsigned char)0xF7);
1933   emit_int8((unsigned char)(0xE8 | encode));
1934 }
1935 
1936 void Assembler::imull(Register dst, Register src) {
1937   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1938   emit_int8(0x0F);
1939   emit_int8((unsigned char)0xAF);
1940   emit_int8((unsigned char)(0xC0 | encode));
1941 }
1942 
1943 
1944 void Assembler::imull(Register dst, Register src, int value) {
1945   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1946   if (is8bit(value)) {
1947     emit_int8(0x6B);
1948     emit_int8((unsigned char)(0xC0 | encode));
1949     emit_int8(value & 0xFF);
1950   } else {
1951     emit_int8(0x69);
1952     emit_int8((unsigned char)(0xC0 | encode));
1953     emit_int32(value);
1954   }
1955 }
1956 
1957 void Assembler::imull(Register dst, Address src) {
1958   InstructionMark im(this);
1959   prefix(src, dst);
1960   emit_int8(0x0F);
1961   emit_int8((unsigned char) 0xAF);
1962   emit_operand(dst, src);
1963 }
1964 
1965 
1966 void Assembler::incl(Address dst) {
1967   // Don't use it directly. Use MacroAssembler::increment() instead.
1968   InstructionMark im(this);
1969   prefix(dst);
1970   emit_int8((unsigned char)0xFF);
1971   emit_operand(rax, dst);
1972 }
1973 
1974 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
1975   InstructionMark im(this);
1976   assert((0 <= cc) && (cc < 16), "illegal cc");
1977   if (L.is_bound()) {
1978     address dst = target(L);
1979     assert(dst != NULL, "jcc most probably wrong");
1980 
1981     const int short_size = 2;
1982     const int long_size = 6;
1983     intptr_t offs = (intptr_t)dst - (intptr_t)pc();
1984     if (maybe_short && is8bit(offs - short_size)) {
1985       // 0111 tttn #8-bit disp
1986       emit_int8(0x70 | cc);
1987       emit_int8((offs - short_size) & 0xFF);
1988     } else {
1989       // 0000 1111 1000 tttn #32-bit disp
1990       assert(is_simm32(offs - long_size),
1991              "must be 32bit offset (call4)");
1992       emit_int8(0x0F);
1993       emit_int8((unsigned char)(0x80 | cc));
1994       emit_int32(offs - long_size);
1995     }
1996   } else {
1997     // Note: could eliminate cond. jumps to this jump if condition
1998     //       is the same however, seems to be rather unlikely case.
1999     // Note: use jccb() if label to be bound is very close to get
2000     //       an 8-bit displacement
2001     L.add_patch_at(code(), locator());
2002     emit_int8(0x0F);
2003     emit_int8((unsigned char)(0x80 | cc));
2004     emit_int32(0);
2005   }
2006 }
2007 
2008 void Assembler::jccb(Condition cc, Label& L) {
2009   if (L.is_bound()) {
2010     const int short_size = 2;
2011     address entry = target(L);
2012 #ifdef ASSERT
2013     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2014     intptr_t delta = short_branch_delta();
2015     if (delta != 0) {
2016       dist += (dist < 0 ? (-delta) :delta);
2017     }
2018     assert(is8bit(dist), "Dispacement too large for a short jmp");
2019 #endif
2020     intptr_t offs = (intptr_t)entry - (intptr_t)pc();
2021     // 0111 tttn #8-bit disp
2022     emit_int8(0x70 | cc);
2023     emit_int8((offs - short_size) & 0xFF);
2024   } else {
2025     InstructionMark im(this);
2026     L.add_patch_at(code(), locator());
2027     emit_int8(0x70 | cc);
2028     emit_int8(0);
2029   }
2030 }
2031 
2032 void Assembler::jccb_if_possible(Condition cc, Label& L) {
2033 
2034 #ifdef ASSERT
2035   if (UseShenandoahGC) {
2036     jcc(cc, L);
2037   } else
2038 #endif
2039     jccb(cc, L);
2040 }
2041 
2042 void Assembler::jmp(Address adr) {
2043   InstructionMark im(this);
2044   prefix(adr);
2045   emit_int8((unsigned char)0xFF);
2046   emit_operand(rsp, adr);
2047 }
2048 
2049 void Assembler::jmp(Label& L, bool maybe_short) {
2050   if (L.is_bound()) {
2051     address entry = target(L);
2052     assert(entry != NULL, "jmp most probably wrong");
2053     InstructionMark im(this);
2054     const int short_size = 2;
2055     const int long_size = 5;
2056     intptr_t offs = entry - pc();
2057     if (maybe_short && is8bit(offs - short_size)) {
2058       emit_int8((unsigned char)0xEB);
2059       emit_int8((offs - short_size) & 0xFF);
2060     } else {
2061       emit_int8((unsigned char)0xE9);
2062       emit_int32(offs - long_size);
2063     }
2064   } else {
2065     // By default, forward jumps are always 32-bit displacements, since
2066     // we can't yet know where the label will be bound.  If you're sure that
2067     // the forward jump will not run beyond 256 bytes, use jmpb to
2068     // force an 8-bit displacement.
2069     InstructionMark im(this);
2070     L.add_patch_at(code(), locator());
2071     emit_int8((unsigned char)0xE9);
2072     emit_int32(0);
2073   }
2074 }
2075 
2076 void Assembler::jmp(Register entry) {
2077   int encode = prefix_and_encode(entry->encoding());
2078   emit_int8((unsigned char)0xFF);
2079   emit_int8((unsigned char)(0xE0 | encode));
2080 }
2081 
2082 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
2083   InstructionMark im(this);
2084   emit_int8((unsigned char)0xE9);
2085   assert(dest != NULL, "must have a target");
2086   intptr_t disp = dest - (pc() + sizeof(int32_t));
2087   assert(is_simm32(disp), "must be 32bit offset (jmp)");
2088   emit_data(disp, rspec.reloc(), call32_operand);
2089 }
2090 
2091 void Assembler::jmpb(Label& L) {
2092   if (L.is_bound()) {
2093     const int short_size = 2;
2094     address entry = target(L);
2095     assert(entry != NULL, "jmp most probably wrong");
2096 #ifdef ASSERT
2097     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2098     intptr_t delta = short_branch_delta();
2099     if (delta != 0) {
2100       dist += (dist < 0 ? (-delta) :delta);
2101     }
2102     assert(is8bit(dist), "Dispacement too large for a short jmp");
2103 #endif
2104     intptr_t offs = entry - pc();
2105     emit_int8((unsigned char)0xEB);
2106     emit_int8((offs - short_size) & 0xFF);
2107   } else {
2108     InstructionMark im(this);
2109     L.add_patch_at(code(), locator());
2110     emit_int8((unsigned char)0xEB);
2111     emit_int8(0);
2112   }
2113 }
2114 
2115 void Assembler::jmpb_if_possible(Label& L) {
2116 
2117 #ifdef ASSERT
2118   if (UseShenandoahGC) {
2119     jmp(L);
2120   } else
2121 #endif
2122     jmpb(L);
2123 }
2124 
2125 void Assembler::ldmxcsr( Address src) {
2126   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2127   InstructionMark im(this);
2128   prefix(src);
2129   emit_int8(0x0F);
2130   emit_int8((unsigned char)0xAE);
2131   emit_operand(as_Register(2), src);
2132 }
2133 
2134 void Assembler::leal(Register dst, Address src) {
2135   InstructionMark im(this);
2136 #ifdef _LP64
2137   emit_int8(0x67); // addr32
2138   prefix(src, dst);
2139 #endif // LP64
2140   emit_int8((unsigned char)0x8D);
2141   emit_operand(dst, src);
2142 }
2143 
2144 void Assembler::lfence() {
2145   emit_int8(0x0F);
2146   emit_int8((unsigned char)0xAE);
2147   emit_int8((unsigned char)0xE8);
2148 }
2149 
2150 void Assembler::lock() {
2151   emit_int8((unsigned char)0xF0);
2152 }
2153 
2154 void Assembler::lzcntl(Register dst, Register src) {
2155   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
2156   emit_int8((unsigned char)0xF3);
2157   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2158   emit_int8(0x0F);
2159   emit_int8((unsigned char)0xBD);
2160   emit_int8((unsigned char)(0xC0 | encode));
2161 }
2162 
2163 // Emit mfence instruction
2164 void Assembler::mfence() {
2165   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
2166   emit_int8(0x0F);
2167   emit_int8((unsigned char)0xAE);
2168   emit_int8((unsigned char)0xF0);
2169 }
2170 
2171 void Assembler::mov(Register dst, Register src) {
2172   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2173 }
2174 
2175 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
2176   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2177   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2178   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2179   attributes.set_rex_vex_w_reverted();
2180   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2181   emit_int8(0x28);
2182   emit_int8((unsigned char)(0xC0 | encode));
2183 }
2184 
2185 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
2186   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2187   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2188   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2189   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2190   emit_int8(0x28);
2191   emit_int8((unsigned char)(0xC0 | encode));
2192 }
2193 
2194 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
2195   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2196   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2197   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2198   emit_int8(0x16);
2199   emit_int8((unsigned char)(0xC0 | encode));
2200 }
2201 
2202 void Assembler::movb(Register dst, Address src) {
2203   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2204   InstructionMark im(this);
2205   prefix(src, dst, true);
2206   emit_int8((unsigned char)0x8A);
2207   emit_operand(dst, src);
2208 }
2209 
2210 void Assembler::movddup(XMMRegister dst, XMMRegister src) {
2211   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
2212   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2213   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2214   attributes.set_rex_vex_w_reverted();
2215   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2216   emit_int8(0x12);
2217   emit_int8(0xC0 | encode);
2218 }
2219 
2220 void Assembler::kmovbl(KRegister dst, Register src) {
2221   assert(VM_Version::supports_avx512dq(), "");
2222   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2223   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2224   emit_int8((unsigned char)0x92);
2225   emit_int8((unsigned char)(0xC0 | encode));
2226 }
2227 
2228 void Assembler::kmovbl(Register dst, KRegister src) {
2229   assert(VM_Version::supports_avx512dq(), "");
2230   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2231   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2232   emit_int8((unsigned char)0x93);
2233   emit_int8((unsigned char)(0xC0 | encode));
2234 }
2235 
2236 void Assembler::kmovwl(KRegister dst, Register src) {
2237   assert(VM_Version::supports_evex(), "");
2238   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2239   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2240   emit_int8((unsigned char)0x92);
2241   emit_int8((unsigned char)(0xC0 | encode));
2242 }
2243 
2244 void Assembler::kmovwl(Register dst, KRegister src) {
2245   assert(VM_Version::supports_evex(), "");
2246   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2247   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2248   emit_int8((unsigned char)0x93);
2249   emit_int8((unsigned char)(0xC0 | encode));
2250 }
2251 
2252 void Assembler::kmovwl(KRegister dst, Address src) {
2253   assert(VM_Version::supports_evex(), "");
2254   InstructionMark im(this);
2255   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2256   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2257   emit_int8((unsigned char)0x90);
2258   emit_operand((Register)dst, src);
2259 }
2260 
2261 void Assembler::kmovdl(KRegister dst, Register src) {
2262   assert(VM_Version::supports_avx512bw(), "");
2263   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2264   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2265   emit_int8((unsigned char)0x92);
2266   emit_int8((unsigned char)(0xC0 | encode));
2267 }
2268 
2269 void Assembler::kmovdl(Register dst, KRegister src) {
2270   assert(VM_Version::supports_avx512bw(), "");
2271   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2272   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2273   emit_int8((unsigned char)0x93);
2274   emit_int8((unsigned char)(0xC0 | encode));
2275 }
2276 
2277 void Assembler::kmovql(KRegister dst, KRegister src) {
2278   assert(VM_Version::supports_avx512bw(), "");
2279   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2280   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2281   emit_int8((unsigned char)0x90);
2282   emit_int8((unsigned char)(0xC0 | encode));
2283 }
2284 
2285 void Assembler::kmovql(KRegister dst, Address src) {
2286   assert(VM_Version::supports_avx512bw(), "");
2287   InstructionMark im(this);
2288   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2289   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2290   emit_int8((unsigned char)0x90);
2291   emit_operand((Register)dst, src);
2292 }
2293 
2294 void Assembler::kmovql(Address dst, KRegister src) {
2295   assert(VM_Version::supports_avx512bw(), "");
2296   InstructionMark im(this);
2297   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2298   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2299   emit_int8((unsigned char)0x90);
2300   emit_operand((Register)src, dst);
2301 }
2302 
2303 void Assembler::kmovql(KRegister dst, Register src) {
2304   assert(VM_Version::supports_avx512bw(), "");
2305   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2306   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2307   emit_int8((unsigned char)0x92);
2308   emit_int8((unsigned char)(0xC0 | encode));
2309 }
2310 
2311 void Assembler::kmovql(Register dst, KRegister src) {
2312   assert(VM_Version::supports_avx512bw(), "");
2313   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2314   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2315   emit_int8((unsigned char)0x93);
2316   emit_int8((unsigned char)(0xC0 | encode));
2317 }
2318 
2319 void Assembler::knotwl(KRegister dst, KRegister src) {
2320   assert(VM_Version::supports_evex(), "");
2321   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2322   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2323   emit_int8((unsigned char)0x44);
2324   emit_int8((unsigned char)(0xC0 | encode));
2325 }
2326 
2327 // This instruction produces ZF or CF flags
2328 void Assembler::kortestbl(KRegister src1, KRegister src2) {
2329   assert(VM_Version::supports_avx512dq(), "");
2330   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2331   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2332   emit_int8((unsigned char)0x98);
2333   emit_int8((unsigned char)(0xC0 | encode));
2334 }
2335 
2336 // This instruction produces ZF or CF flags
2337 void Assembler::kortestwl(KRegister src1, KRegister src2) {
2338   assert(VM_Version::supports_evex(), "");
2339   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2340   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2341   emit_int8((unsigned char)0x98);
2342   emit_int8((unsigned char)(0xC0 | encode));
2343 }
2344 
2345 // This instruction produces ZF or CF flags
2346 void Assembler::kortestdl(KRegister src1, KRegister src2) {
2347   assert(VM_Version::supports_avx512bw(), "");
2348   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2349   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2350   emit_int8((unsigned char)0x98);
2351   emit_int8((unsigned char)(0xC0 | encode));
2352 }
2353 
2354 // This instruction produces ZF or CF flags
2355 void Assembler::kortestql(KRegister src1, KRegister src2) {
2356   assert(VM_Version::supports_avx512bw(), "");
2357   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2358   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2359   emit_int8((unsigned char)0x98);
2360   emit_int8((unsigned char)(0xC0 | encode));
2361 }
2362 
2363 // This instruction produces ZF or CF flags
2364 void Assembler::ktestql(KRegister src1, KRegister src2) {
2365   assert(VM_Version::supports_avx512bw(), "");
2366   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2367   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2368   emit_int8((unsigned char)0x99);
2369   emit_int8((unsigned char)(0xC0 | encode));
2370 }
2371 
2372 void Assembler::ktestq(KRegister src1, KRegister src2) {
2373   assert(VM_Version::supports_avx512bw(), "");
2374   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2375   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2376   emit_int8((unsigned char)0x99);
2377   emit_int8((unsigned char)(0xC0 | encode));
2378 }
2379 
2380 void Assembler::ktestd(KRegister src1, KRegister src2) {
2381   assert(VM_Version::supports_avx512bw(), "");
2382   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2383   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2384   emit_int8((unsigned char)0x99);
2385   emit_int8((unsigned char)(0xC0 | encode));
2386 }
2387 
2388 void Assembler::movb(Address dst, int imm8) {
2389   InstructionMark im(this);
2390    prefix(dst);
2391   emit_int8((unsigned char)0xC6);
2392   emit_operand(rax, dst, 1);
2393   emit_int8(imm8);
2394 }
2395 
2396 
2397 void Assembler::movb(Address dst, Register src) {
2398   assert(src->has_byte_register(), "must have byte register");
2399   InstructionMark im(this);
2400   prefix(dst, src, true);
2401   emit_int8((unsigned char)0x88);
2402   emit_operand(src, dst);
2403 }
2404 
2405 void Assembler::movdl(XMMRegister dst, Register src) {
2406   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2407   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2408   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2409   emit_int8(0x6E);
2410   emit_int8((unsigned char)(0xC0 | encode));
2411 }
2412 
2413 void Assembler::movdl(Register dst, XMMRegister src) {
2414   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2415   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2416   // swap src/dst to get correct prefix
2417   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2418   emit_int8(0x7E);
2419   emit_int8((unsigned char)(0xC0 | encode));
2420 }
2421 
2422 void Assembler::movdl(XMMRegister dst, Address src) {
2423   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2424   InstructionMark im(this);
2425   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2426   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2427   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2428   emit_int8(0x6E);
2429   emit_operand(dst, src);
2430 }
2431 
2432 void Assembler::movdl(Address dst, XMMRegister src) {
2433   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2434   InstructionMark im(this);
2435   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2436   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2437   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2438   emit_int8(0x7E);
2439   emit_operand(src, dst);
2440 }
2441 
2442 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
2443   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2444   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2445   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2446   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2447   emit_int8(0x6F);
2448   emit_int8((unsigned char)(0xC0 | encode));
2449 }
2450 
2451 void Assembler::movdqa(XMMRegister dst, Address src) {
2452   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2453   InstructionMark im(this);
2454   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2455   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2456   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2457   emit_int8(0x6F);
2458   emit_operand(dst, src);
2459 }
2460 
2461 void Assembler::movdqu(XMMRegister dst, Address src) {
2462   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2463   InstructionMark im(this);
2464   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2465   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2466   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2467   emit_int8(0x6F);
2468   emit_operand(dst, src);
2469 }
2470 
2471 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
2472   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2473   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2474   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2475   emit_int8(0x6F);
2476   emit_int8((unsigned char)(0xC0 | encode));
2477 }
2478 
2479 void Assembler::movdqu(Address dst, XMMRegister src) {
2480   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2481   InstructionMark im(this);
2482   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2483   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2484   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2485   emit_int8(0x7F);
2486   emit_operand(src, dst);
2487 }
2488 
2489 // Move Unaligned 256bit Vector
2490 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2491   assert(UseAVX > 0, "");
2492   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2493   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2494   emit_int8(0x6F);
2495   emit_int8((unsigned char)(0xC0 | encode));
2496 }
2497 
2498 void Assembler::vmovdqu(XMMRegister dst, Address src) {
2499   assert(UseAVX > 0, "");
2500   InstructionMark im(this);
2501   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2502   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2503   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2504   emit_int8(0x6F);
2505   emit_operand(dst, src);
2506 }
2507 
2508 void Assembler::vmovdqu(Address dst, XMMRegister src) {
2509   assert(UseAVX > 0, "");
2510   InstructionMark im(this);
2511   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2512   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2513   // swap src<->dst for encoding
2514   assert(src != xnoreg, "sanity");
2515   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2516   emit_int8(0x7F);
2517   emit_operand(src, dst);
2518 }
2519 
2520 // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
2521 void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) {
2522   assert(VM_Version::supports_evex(), "");
2523   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2524   attributes.set_is_evex_instruction();
2525   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2526   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2527   emit_int8(0x6F);
2528   emit_int8((unsigned char)(0xC0 | encode));
2529 }
2530 
2531 void Assembler::evmovdqub(XMMRegister dst, Address src, int vector_len) {
2532   assert(VM_Version::supports_evex(), "");
2533   InstructionMark im(this);
2534   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2535   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2536   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2537   attributes.set_is_evex_instruction();
2538   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2539   emit_int8(0x6F);
2540   emit_operand(dst, src);
2541 }
2542 
2543 void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
2544   assert(VM_Version::supports_evex(), "");
2545   assert(src != xnoreg, "sanity");
2546   InstructionMark im(this);
2547   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2548   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2549   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2550   attributes.set_is_evex_instruction();
2551   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2552   emit_int8(0x7F);
2553   emit_operand(src, dst);
2554 }
2555 
2556 void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, int vector_len) {
2557   assert(VM_Version::supports_avx512vlbw(), "");
2558   assert(is_vector_masking(), "");    // For stub code use only
2559   InstructionMark im(this);
2560   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
2561   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2562   attributes.set_embedded_opmask_register_specifier(mask);
2563   attributes.set_is_evex_instruction();
2564   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2565   emit_int8(0x6F);
2566   emit_operand(dst, src);
2567 }
2568 
2569 void Assembler::evmovdquw(XMMRegister dst, Address src, int vector_len) {
2570   assert(VM_Version::supports_evex(), "");
2571   InstructionMark im(this);
2572   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2573   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2574   attributes.set_is_evex_instruction();
2575   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2576   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2577   emit_int8(0x6F);
2578   emit_operand(dst, src);
2579 }
2580 
2581 void Assembler::evmovdquw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
2582   assert(is_vector_masking(), "");
2583   assert(VM_Version::supports_avx512vlbw(), "");
2584   InstructionMark im(this);
2585   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
2586   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2587   attributes.set_embedded_opmask_register_specifier(mask);
2588   attributes.set_is_evex_instruction();
2589   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2590   emit_int8(0x6F);
2591   emit_operand(dst, src);
2592 }
2593 
2594 void Assembler::evmovdquw(Address dst, XMMRegister src, int vector_len) {
2595   assert(VM_Version::supports_evex(), "");
2596   assert(src != xnoreg, "sanity");
2597   InstructionMark im(this);
2598   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2599   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2600   attributes.set_is_evex_instruction();
2601   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2602   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2603   emit_int8(0x7F);
2604   emit_operand(src, dst);
2605 }
2606 
2607 void Assembler::evmovdquw(Address dst, KRegister mask, XMMRegister src, int vector_len) {
2608   assert(VM_Version::supports_avx512vlbw(), "");
2609   assert(src != xnoreg, "sanity");
2610   InstructionMark im(this);
2611   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2612   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2613   attributes.set_embedded_opmask_register_specifier(mask);
2614   attributes.set_is_evex_instruction();
2615   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2616   emit_int8(0x7F);
2617   emit_operand(src, dst);
2618 }
2619 
2620 void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
2621   assert(VM_Version::supports_evex(), "");
2622   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2623   attributes.set_is_evex_instruction();
2624   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2625   emit_int8(0x6F);
2626   emit_int8((unsigned char)(0xC0 | encode));
2627 }
2628 
2629 void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) {
2630   assert(VM_Version::supports_evex(), "");
2631   InstructionMark im(this);
2632   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false , /* uses_vl */ true);
2633   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2634   attributes.set_is_evex_instruction();
2635   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2636   emit_int8(0x6F);
2637   emit_operand(dst, src);
2638 }
2639 
2640 void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
2641   assert(VM_Version::supports_evex(), "");
2642   assert(src != xnoreg, "sanity");
2643   InstructionMark im(this);
2644   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2645   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2646   attributes.set_is_evex_instruction();
2647   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2648   emit_int8(0x7F);
2649   emit_operand(src, dst);
2650 }
2651 
2652 void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
2653   assert(VM_Version::supports_evex(), "");
2654   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2655   attributes.set_is_evex_instruction();
2656   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2657   emit_int8(0x6F);
2658   emit_int8((unsigned char)(0xC0 | encode));
2659 }
2660 
2661 void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
2662   assert(VM_Version::supports_evex(), "");
2663   InstructionMark im(this);
2664   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2665   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2666   attributes.set_is_evex_instruction();
2667   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2668   emit_int8(0x6F);
2669   emit_operand(dst, src);
2670 }
2671 
2672 void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
2673   assert(VM_Version::supports_evex(), "");
2674   assert(src != xnoreg, "sanity");
2675   InstructionMark im(this);
2676   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2677   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2678   attributes.set_is_evex_instruction();
2679   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2680   emit_int8(0x7F);
2681   emit_operand(src, dst);
2682 }
2683 
2684 // Uses zero extension on 64bit
2685 
2686 void Assembler::movl(Register dst, int32_t imm32) {
2687   int encode = prefix_and_encode(dst->encoding());
2688   emit_int8((unsigned char)(0xB8 | encode));
2689   emit_int32(imm32);
2690 }
2691 
2692 void Assembler::movl(Register dst, Register src) {
2693   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2694   emit_int8((unsigned char)0x8B);
2695   emit_int8((unsigned char)(0xC0 | encode));
2696 }
2697 
2698 void Assembler::movl(Register dst, Address src) {
2699   InstructionMark im(this);
2700   prefix(src, dst);
2701   emit_int8((unsigned char)0x8B);
2702   emit_operand(dst, src);
2703 }
2704 
2705 void Assembler::movl(Address dst, int32_t imm32) {
2706   InstructionMark im(this);
2707   prefix(dst);
2708   emit_int8((unsigned char)0xC7);
2709   emit_operand(rax, dst, 4);
2710   emit_int32(imm32);
2711 }
2712 
2713 void Assembler::movl(Address dst, Register src) {
2714   InstructionMark im(this);
2715   prefix(dst, src);
2716   emit_int8((unsigned char)0x89);
2717   emit_operand(src, dst);
2718 }
2719 
2720 // New cpus require to use movsd and movss to avoid partial register stall
2721 // when loading from memory. But for old Opteron use movlpd instead of movsd.
2722 // The selection is done in MacroAssembler::movdbl() and movflt().
2723 void Assembler::movlpd(XMMRegister dst, Address src) {
2724   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2725   InstructionMark im(this);
2726   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2727   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2728   attributes.set_rex_vex_w_reverted();
2729   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2730   emit_int8(0x12);
2731   emit_operand(dst, src);
2732 }
2733 
2734 void Assembler::movq( MMXRegister dst, Address src ) {
2735   assert( VM_Version::supports_mmx(), "" );
2736   emit_int8(0x0F);
2737   emit_int8(0x6F);
2738   emit_operand(dst, src);
2739 }
2740 
2741 void Assembler::movq( Address dst, MMXRegister src ) {
2742   assert( VM_Version::supports_mmx(), "" );
2743   emit_int8(0x0F);
2744   emit_int8(0x7F);
2745   // workaround gcc (3.2.1-7a) bug
2746   // In that version of gcc with only an emit_operand(MMX, Address)
2747   // gcc will tail jump and try and reverse the parameters completely
2748   // obliterating dst in the process. By having a version available
2749   // that doesn't need to swap the args at the tail jump the bug is
2750   // avoided.
2751   emit_operand(dst, src);
2752 }
2753 
2754 void Assembler::movq(XMMRegister dst, Address src) {
2755   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2756   InstructionMark im(this);
2757   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2758   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2759   attributes.set_rex_vex_w_reverted();
2760   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2761   emit_int8(0x7E);
2762   emit_operand(dst, src);
2763 }
2764 
2765 void Assembler::movq(Address dst, XMMRegister src) {
2766   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2767   InstructionMark im(this);
2768   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2769   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2770   attributes.set_rex_vex_w_reverted();
2771   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2772   emit_int8((unsigned char)0xD6);
2773   emit_operand(src, dst);
2774 }
2775 
2776 void Assembler::movsbl(Register dst, Address src) { // movsxb
2777   InstructionMark im(this);
2778   prefix(src, dst);
2779   emit_int8(0x0F);
2780   emit_int8((unsigned char)0xBE);
2781   emit_operand(dst, src);
2782 }
2783 
2784 void Assembler::movsbl(Register dst, Register src) { // movsxb
2785   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2786   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2787   emit_int8(0x0F);
2788   emit_int8((unsigned char)0xBE);
2789   emit_int8((unsigned char)(0xC0 | encode));
2790 }
2791 
2792 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
2793   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2794   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2795   attributes.set_rex_vex_w_reverted();
2796   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2797   emit_int8(0x10);
2798   emit_int8((unsigned char)(0xC0 | encode));
2799 }
2800 
2801 void Assembler::movsd(XMMRegister dst, Address src) {
2802   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2803   InstructionMark im(this);
2804   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2805   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2806   attributes.set_rex_vex_w_reverted();
2807   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2808   emit_int8(0x10);
2809   emit_operand(dst, src);
2810 }
2811 
2812 void Assembler::movsd(Address dst, XMMRegister src) {
2813   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2814   InstructionMark im(this);
2815   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2816   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2817   attributes.set_rex_vex_w_reverted();
2818   simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2819   emit_int8(0x11);
2820   emit_operand(src, dst);
2821 }
2822 
2823 void Assembler::movss(XMMRegister dst, XMMRegister src) {
2824   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2825   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2826   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2827   emit_int8(0x10);
2828   emit_int8((unsigned char)(0xC0 | encode));
2829 }
2830 
2831 void Assembler::movss(XMMRegister dst, Address src) {
2832   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2833   InstructionMark im(this);
2834   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2835   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2836   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2837   emit_int8(0x10);
2838   emit_operand(dst, src);
2839 }
2840 
2841 void Assembler::movss(Address dst, XMMRegister src) {
2842   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2843   InstructionMark im(this);
2844   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2845   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2846   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2847   emit_int8(0x11);
2848   emit_operand(src, dst);
2849 }
2850 
2851 void Assembler::movswl(Register dst, Address src) { // movsxw
2852   InstructionMark im(this);
2853   prefix(src, dst);
2854   emit_int8(0x0F);
2855   emit_int8((unsigned char)0xBF);
2856   emit_operand(dst, src);
2857 }
2858 
2859 void Assembler::movswl(Register dst, Register src) { // movsxw
2860   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2861   emit_int8(0x0F);
2862   emit_int8((unsigned char)0xBF);
2863   emit_int8((unsigned char)(0xC0 | encode));
2864 }
2865 
2866 void Assembler::movw(Address dst, int imm16) {
2867   InstructionMark im(this);
2868 
2869   emit_int8(0x66); // switch to 16-bit mode
2870   prefix(dst);
2871   emit_int8((unsigned char)0xC7);
2872   emit_operand(rax, dst, 2);
2873   emit_int16(imm16);
2874 }
2875 
2876 void Assembler::movw(Register dst, Address src) {
2877   InstructionMark im(this);
2878   emit_int8(0x66);
2879   prefix(src, dst);
2880   emit_int8((unsigned char)0x8B);
2881   emit_operand(dst, src);
2882 }
2883 
2884 void Assembler::movw(Address dst, Register src) {
2885   InstructionMark im(this);
2886   emit_int8(0x66);
2887   prefix(dst, src);
2888   emit_int8((unsigned char)0x89);
2889   emit_operand(src, dst);
2890 }
2891 
2892 void Assembler::movzbl(Register dst, Address src) { // movzxb
2893   InstructionMark im(this);
2894   prefix(src, dst);
2895   emit_int8(0x0F);
2896   emit_int8((unsigned char)0xB6);
2897   emit_operand(dst, src);
2898 }
2899 
2900 void Assembler::movzbl(Register dst, Register src) { // movzxb
2901   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2902   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2903   emit_int8(0x0F);
2904   emit_int8((unsigned char)0xB6);
2905   emit_int8(0xC0 | encode);
2906 }
2907 
2908 void Assembler::movzwl(Register dst, Address src) { // movzxw
2909   InstructionMark im(this);
2910   prefix(src, dst);
2911   emit_int8(0x0F);
2912   emit_int8((unsigned char)0xB7);
2913   emit_operand(dst, src);
2914 }
2915 
2916 void Assembler::movzwl(Register dst, Register src) { // movzxw
2917   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2918   emit_int8(0x0F);
2919   emit_int8((unsigned char)0xB7);
2920   emit_int8(0xC0 | encode);
2921 }
2922 
2923 void Assembler::mull(Address src) {
2924   InstructionMark im(this);
2925   prefix(src);
2926   emit_int8((unsigned char)0xF7);
2927   emit_operand(rsp, src);
2928 }
2929 
2930 void Assembler::mull(Register src) {
2931   int encode = prefix_and_encode(src->encoding());
2932   emit_int8((unsigned char)0xF7);
2933   emit_int8((unsigned char)(0xE0 | encode));
2934 }
2935 
2936 void Assembler::mulsd(XMMRegister dst, Address src) {
2937   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2938   InstructionMark im(this);
2939   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2940   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2941   attributes.set_rex_vex_w_reverted();
2942   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2943   emit_int8(0x59);
2944   emit_operand(dst, src);
2945 }
2946 
2947 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
2948   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2949   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2950   attributes.set_rex_vex_w_reverted();
2951   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2952   emit_int8(0x59);
2953   emit_int8((unsigned char)(0xC0 | encode));
2954 }
2955 
2956 void Assembler::mulss(XMMRegister dst, Address src) {
2957   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2958   InstructionMark im(this);
2959   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2960   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2961   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2962   emit_int8(0x59);
2963   emit_operand(dst, src);
2964 }
2965 
2966 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
2967   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2968   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2969   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2970   emit_int8(0x59);
2971   emit_int8((unsigned char)(0xC0 | encode));
2972 }
2973 
2974 void Assembler::negl(Register dst) {
2975   int encode = prefix_and_encode(dst->encoding());
2976   emit_int8((unsigned char)0xF7);
2977   emit_int8((unsigned char)(0xD8 | encode));
2978 }
2979 
2980 void Assembler::nop(int i) {
2981 #ifdef ASSERT
2982   assert(i > 0, " ");
2983   // The fancy nops aren't currently recognized by debuggers making it a
2984   // pain to disassemble code while debugging. If asserts are on clearly
2985   // speed is not an issue so simply use the single byte traditional nop
2986   // to do alignment.
2987 
2988   for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
2989   return;
2990 
2991 #endif // ASSERT
2992 
2993   if (UseAddressNop && VM_Version::is_intel()) {
2994     //
2995     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
2996     //  1: 0x90
2997     //  2: 0x66 0x90
2998     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2999     //  4: 0x0F 0x1F 0x40 0x00
3000     //  5: 0x0F 0x1F 0x44 0x00 0x00
3001     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3002     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3003     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3004     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3005     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3006     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3007 
3008     // The rest coding is Intel specific - don't use consecutive address nops
3009 
3010     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3011     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3012     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3013     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3014 
3015     while(i >= 15) {
3016       // For Intel don't generate consecutive addess nops (mix with regular nops)
3017       i -= 15;
3018       emit_int8(0x66);   // size prefix
3019       emit_int8(0x66);   // size prefix
3020       emit_int8(0x66);   // size prefix
3021       addr_nop_8();
3022       emit_int8(0x66);   // size prefix
3023       emit_int8(0x66);   // size prefix
3024       emit_int8(0x66);   // size prefix
3025       emit_int8((unsigned char)0x90);
3026                          // nop
3027     }
3028     switch (i) {
3029       case 14:
3030         emit_int8(0x66); // size prefix
3031       case 13:
3032         emit_int8(0x66); // size prefix
3033       case 12:
3034         addr_nop_8();
3035         emit_int8(0x66); // size prefix
3036         emit_int8(0x66); // size prefix
3037         emit_int8(0x66); // size prefix
3038         emit_int8((unsigned char)0x90);
3039                          // nop
3040         break;
3041       case 11:
3042         emit_int8(0x66); // size prefix
3043       case 10:
3044         emit_int8(0x66); // size prefix
3045       case 9:
3046         emit_int8(0x66); // size prefix
3047       case 8:
3048         addr_nop_8();
3049         break;
3050       case 7:
3051         addr_nop_7();
3052         break;
3053       case 6:
3054         emit_int8(0x66); // size prefix
3055       case 5:
3056         addr_nop_5();
3057         break;
3058       case 4:
3059         addr_nop_4();
3060         break;
3061       case 3:
3062         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3063         emit_int8(0x66); // size prefix
3064       case 2:
3065         emit_int8(0x66); // size prefix
3066       case 1:
3067         emit_int8((unsigned char)0x90);
3068                          // nop
3069         break;
3070       default:
3071         assert(i == 0, " ");
3072     }
3073     return;
3074   }
3075   if (UseAddressNop && VM_Version::is_amd()) {
3076     //
3077     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
3078     //  1: 0x90
3079     //  2: 0x66 0x90
3080     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3081     //  4: 0x0F 0x1F 0x40 0x00
3082     //  5: 0x0F 0x1F 0x44 0x00 0x00
3083     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3084     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3085     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3086     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3087     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3088     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3089 
3090     // The rest coding is AMD specific - use consecutive address nops
3091 
3092     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3093     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3094     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3095     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3096     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3097     //     Size prefixes (0x66) are added for larger sizes
3098 
3099     while(i >= 22) {
3100       i -= 11;
3101       emit_int8(0x66); // size prefix
3102       emit_int8(0x66); // size prefix
3103       emit_int8(0x66); // size prefix
3104       addr_nop_8();
3105     }
3106     // Generate first nop for size between 21-12
3107     switch (i) {
3108       case 21:
3109         i -= 1;
3110         emit_int8(0x66); // size prefix
3111       case 20:
3112       case 19:
3113         i -= 1;
3114         emit_int8(0x66); // size prefix
3115       case 18:
3116       case 17:
3117         i -= 1;
3118         emit_int8(0x66); // size prefix
3119       case 16:
3120       case 15:
3121         i -= 8;
3122         addr_nop_8();
3123         break;
3124       case 14:
3125       case 13:
3126         i -= 7;
3127         addr_nop_7();
3128         break;
3129       case 12:
3130         i -= 6;
3131         emit_int8(0x66); // size prefix
3132         addr_nop_5();
3133         break;
3134       default:
3135         assert(i < 12, " ");
3136     }
3137 
3138     // Generate second nop for size between 11-1
3139     switch (i) {
3140       case 11:
3141         emit_int8(0x66); // size prefix
3142       case 10:
3143         emit_int8(0x66); // size prefix
3144       case 9:
3145         emit_int8(0x66); // size prefix
3146       case 8:
3147         addr_nop_8();
3148         break;
3149       case 7:
3150         addr_nop_7();
3151         break;
3152       case 6:
3153         emit_int8(0x66); // size prefix
3154       case 5:
3155         addr_nop_5();
3156         break;
3157       case 4:
3158         addr_nop_4();
3159         break;
3160       case 3:
3161         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3162         emit_int8(0x66); // size prefix
3163       case 2:
3164         emit_int8(0x66); // size prefix
3165       case 1:
3166         emit_int8((unsigned char)0x90);
3167                          // nop
3168         break;
3169       default:
3170         assert(i == 0, " ");
3171     }
3172     return;
3173   }
3174 
3175   // Using nops with size prefixes "0x66 0x90".
3176   // From AMD Optimization Guide:
3177   //  1: 0x90
3178   //  2: 0x66 0x90
3179   //  3: 0x66 0x66 0x90
3180   //  4: 0x66 0x66 0x66 0x90
3181   //  5: 0x66 0x66 0x90 0x66 0x90
3182   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
3183   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
3184   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
3185   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3186   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3187   //
3188   while(i > 12) {
3189     i -= 4;
3190     emit_int8(0x66); // size prefix
3191     emit_int8(0x66);
3192     emit_int8(0x66);
3193     emit_int8((unsigned char)0x90);
3194                      // nop
3195   }
3196   // 1 - 12 nops
3197   if(i > 8) {
3198     if(i > 9) {
3199       i -= 1;
3200       emit_int8(0x66);
3201     }
3202     i -= 3;
3203     emit_int8(0x66);
3204     emit_int8(0x66);
3205     emit_int8((unsigned char)0x90);
3206   }
3207   // 1 - 8 nops
3208   if(i > 4) {
3209     if(i > 6) {
3210       i -= 1;
3211       emit_int8(0x66);
3212     }
3213     i -= 3;
3214     emit_int8(0x66);
3215     emit_int8(0x66);
3216     emit_int8((unsigned char)0x90);
3217   }
3218   switch (i) {
3219     case 4:
3220       emit_int8(0x66);
3221     case 3:
3222       emit_int8(0x66);
3223     case 2:
3224       emit_int8(0x66);
3225     case 1:
3226       emit_int8((unsigned char)0x90);
3227       break;
3228     default:
3229       assert(i == 0, " ");
3230   }
3231 }
3232 
3233 void Assembler::notl(Register dst) {
3234   int encode = prefix_and_encode(dst->encoding());
3235   emit_int8((unsigned char)0xF7);
3236   emit_int8((unsigned char)(0xD0 | encode));
3237 }
3238 
3239 void Assembler::orl(Address dst, int32_t imm32) {
3240   InstructionMark im(this);
3241   prefix(dst);
3242   emit_arith_operand(0x81, rcx, dst, imm32);
3243 }
3244 
3245 void Assembler::orl(Register dst, int32_t imm32) {
3246   prefix(dst);
3247   emit_arith(0x81, 0xC8, dst, imm32);
3248 }
3249 
3250 void Assembler::orl(Register dst, Address src) {
3251   InstructionMark im(this);
3252   prefix(src, dst);
3253   emit_int8(0x0B);
3254   emit_operand(dst, src);
3255 }
3256 
3257 void Assembler::orl(Register dst, Register src) {
3258   (void) prefix_and_encode(dst->encoding(), src->encoding());
3259   emit_arith(0x0B, 0xC0, dst, src);
3260 }
3261 
3262 void Assembler::orl(Address dst, Register src) {
3263   InstructionMark im(this);
3264   prefix(dst, src);
3265   emit_int8(0x09);
3266   emit_operand(src, dst);
3267 }
3268 
3269 void Assembler::packuswb(XMMRegister dst, Address src) {
3270   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3271   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3272   InstructionMark im(this);
3273   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3274   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3275   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3276   emit_int8(0x67);
3277   emit_operand(dst, src);
3278 }
3279 
3280 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
3281   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3282   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3283   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3284   emit_int8(0x67);
3285   emit_int8((unsigned char)(0xC0 | encode));
3286 }
3287 
3288 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3289   assert(UseAVX > 0, "some form of AVX must be enabled");
3290   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3291   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3292   emit_int8(0x67);
3293   emit_int8((unsigned char)(0xC0 | encode));
3294 }
3295 
3296 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
3297   assert(VM_Version::supports_avx2(), "");
3298   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3299   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3300   emit_int8(0x00);
3301   emit_int8(0xC0 | encode);
3302   emit_int8(imm8);
3303 }
3304 
3305 void Assembler::vperm2i128(XMMRegister dst,  XMMRegister nds, XMMRegister src, int imm8) {
3306   assert(VM_Version::supports_avx2(), "");
3307   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3308   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3309   emit_int8(0x46);
3310   emit_int8(0xC0 | encode);
3311   emit_int8(imm8);
3312 }
3313 
3314 
3315 void Assembler::pause() {
3316   emit_int8((unsigned char)0xF3);
3317   emit_int8((unsigned char)0x90);
3318 }
3319 
3320 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
3321   assert(VM_Version::supports_sse4_2(), "");
3322   InstructionMark im(this);
3323   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3324   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3325   emit_int8(0x61);
3326   emit_operand(dst, src);
3327   emit_int8(imm8);
3328 }
3329 
3330 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
3331   assert(VM_Version::supports_sse4_2(), "");
3332   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3333   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3334   emit_int8(0x61);
3335   emit_int8((unsigned char)(0xC0 | encode));
3336   emit_int8(imm8);
3337 }
3338 
3339 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3340 void Assembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
3341   assert(VM_Version::supports_sse2(), "");
3342   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3343   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3344   emit_int8(0x74);
3345   emit_int8((unsigned char)(0xC0 | encode));
3346 }
3347 
3348 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3349 void Assembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3350   assert(VM_Version::supports_avx(), "");
3351   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3352   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3353   emit_int8(0x74);
3354   emit_int8((unsigned char)(0xC0 | encode));
3355 }
3356 
3357 // In this context, kdst is written the mask used to process the equal components
3358 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3359   assert(VM_Version::supports_avx512bw(), "");
3360   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3361   attributes.set_is_evex_instruction();
3362   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3363   emit_int8(0x74);
3364   emit_int8((unsigned char)(0xC0 | encode));
3365 }
3366 
3367 void Assembler::evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3368   assert(VM_Version::supports_avx512vlbw(), "");
3369   InstructionMark im(this);
3370   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3371   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3372   attributes.set_is_evex_instruction();
3373   int dst_enc = kdst->encoding();
3374   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3375   emit_int8(0x64);
3376   emit_operand(as_Register(dst_enc), src);
3377 }
3378 
3379 void Assembler::evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3380   assert(is_vector_masking(), "");
3381   assert(VM_Version::supports_avx512vlbw(), "");
3382   InstructionMark im(this);
3383   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3384   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3385   attributes.set_embedded_opmask_register_specifier(mask);
3386   attributes.set_is_evex_instruction();
3387   int dst_enc = kdst->encoding();
3388   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3389   emit_int8(0x64);
3390   emit_operand(as_Register(dst_enc), src);
3391 }
3392 
3393 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3394   assert(VM_Version::supports_avx512vlbw(), "");
3395   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3396   attributes.set_is_evex_instruction();
3397   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3398   emit_int8(0x3E);
3399   emit_int8((unsigned char)(0xC0 | encode));
3400   emit_int8(vcc);
3401 }
3402 
3403 void Assembler::evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3404   assert(is_vector_masking(), "");
3405   assert(VM_Version::supports_avx512vlbw(), "");
3406   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3407   attributes.set_embedded_opmask_register_specifier(mask);
3408   attributes.set_is_evex_instruction();
3409   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3410   emit_int8(0x3E);
3411   emit_int8((unsigned char)(0xC0 | encode));
3412   emit_int8(vcc);
3413 }
3414 
3415 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len) {
3416   assert(VM_Version::supports_avx512vlbw(), "");
3417   InstructionMark im(this);
3418   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3419   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3420   attributes.set_is_evex_instruction();
3421   int dst_enc = kdst->encoding();
3422   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3423   emit_int8(0x3E);
3424   emit_operand(as_Register(dst_enc), src);
3425   emit_int8(vcc);
3426 }
3427 
3428 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3429   assert(VM_Version::supports_avx512bw(), "");
3430   InstructionMark im(this);
3431   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3432   attributes.set_is_evex_instruction();
3433   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3434   int dst_enc = kdst->encoding();
3435   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3436   emit_int8(0x74);
3437   emit_operand(as_Register(dst_enc), src);
3438 }
3439 
3440 void Assembler::evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3441   assert(VM_Version::supports_avx512vlbw(), "");
3442   assert(is_vector_masking(), "");    // For stub code use only
3443   InstructionMark im(this);
3444   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_reg_mask */ false, /* uses_vl */ false);
3445   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3446   attributes.set_embedded_opmask_register_specifier(mask);
3447   attributes.set_is_evex_instruction();
3448   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3449   emit_int8(0x74);
3450   emit_operand(as_Register(kdst->encoding()), src);
3451 }
3452 
3453 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3454 void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
3455   assert(VM_Version::supports_sse2(), "");
3456   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3457   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3458   emit_int8(0x75);
3459   emit_int8((unsigned char)(0xC0 | encode));
3460 }
3461 
3462 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3463 void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3464   assert(VM_Version::supports_avx(), "");
3465   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3466   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3467   emit_int8(0x75);
3468   emit_int8((unsigned char)(0xC0 | encode));
3469 }
3470 
3471 // In this context, kdst is written the mask used to process the equal components
3472 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3473   assert(VM_Version::supports_avx512bw(), "");
3474   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3475   attributes.set_is_evex_instruction();
3476   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3477   emit_int8(0x75);
3478   emit_int8((unsigned char)(0xC0 | encode));
3479 }
3480 
3481 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3482   assert(VM_Version::supports_avx512bw(), "");
3483   InstructionMark im(this);
3484   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3485   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3486   attributes.set_is_evex_instruction();
3487   int dst_enc = kdst->encoding();
3488   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3489   emit_int8(0x75);
3490   emit_operand(as_Register(dst_enc), src);
3491 }
3492 
3493 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3494 void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
3495   assert(VM_Version::supports_sse2(), "");
3496   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3497   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3498   emit_int8(0x76);
3499   emit_int8((unsigned char)(0xC0 | encode));
3500 }
3501 
3502 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3503 void Assembler::vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3504   assert(VM_Version::supports_avx(), "");
3505   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3506   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3507   emit_int8(0x76);
3508   emit_int8((unsigned char)(0xC0 | encode));
3509 }
3510 
3511 // In this context, kdst is written the mask used to process the equal components
3512 void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3513   assert(VM_Version::supports_evex(), "");
3514   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3515   attributes.set_is_evex_instruction();
3516   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3517   emit_int8(0x76);
3518   emit_int8((unsigned char)(0xC0 | encode));
3519 }
3520 
3521 void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3522   assert(VM_Version::supports_evex(), "");
3523   InstructionMark im(this);
3524   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3525   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3526   attributes.set_is_evex_instruction();
3527   int dst_enc = kdst->encoding();
3528   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3529   emit_int8(0x76);
3530   emit_operand(as_Register(dst_enc), src);
3531 }
3532 
3533 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3534 void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) {
3535   assert(VM_Version::supports_sse4_1(), "");
3536   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3537   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3538   emit_int8(0x29);
3539   emit_int8((unsigned char)(0xC0 | encode));
3540 }
3541 
3542 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3543 void Assembler::vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3544   assert(VM_Version::supports_avx(), "");
3545   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3546   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3547   emit_int8(0x29);
3548   emit_int8((unsigned char)(0xC0 | encode));
3549 }
3550 
3551 // In this context, kdst is written the mask used to process the equal components
3552 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3553   assert(VM_Version::supports_evex(), "");
3554   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3555   attributes.set_is_evex_instruction();
3556   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3557   emit_int8(0x29);
3558   emit_int8((unsigned char)(0xC0 | encode));
3559 }
3560 
3561 // In this context, kdst is written the mask used to process the equal components
3562 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3563   assert(VM_Version::supports_evex(), "");
3564   InstructionMark im(this);
3565   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3566   attributes.set_is_evex_instruction();
3567   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
3568   int dst_enc = kdst->encoding();
3569   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3570   emit_int8(0x29);
3571   emit_operand(as_Register(dst_enc), src);
3572 }
3573 
3574 void Assembler::pmovmskb(Register dst, XMMRegister src) {
3575   assert(VM_Version::supports_sse2(), "");
3576   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3577   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3578   emit_int8((unsigned char)0xD7);
3579   emit_int8((unsigned char)(0xC0 | encode));
3580 }
3581 
3582 void Assembler::vpmovmskb(Register dst, XMMRegister src) {
3583   assert(VM_Version::supports_avx2(), "");
3584   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3585   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3586   emit_int8((unsigned char)0xD7);
3587   emit_int8((unsigned char)(0xC0 | encode));
3588 }
3589 
3590 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
3591   assert(VM_Version::supports_sse4_1(), "");
3592   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3593   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3594   emit_int8(0x16);
3595   emit_int8((unsigned char)(0xC0 | encode));
3596   emit_int8(imm8);
3597 }
3598 
3599 void Assembler::pextrd(Address dst, XMMRegister src, int imm8) {
3600   assert(VM_Version::supports_sse4_1(), "");
3601   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3602   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3603   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3604   emit_int8(0x16);
3605   emit_operand(src, dst);
3606   emit_int8(imm8);
3607 }
3608 
3609 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
3610   assert(VM_Version::supports_sse4_1(), "");
3611   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3612   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3613   emit_int8(0x16);
3614   emit_int8((unsigned char)(0xC0 | encode));
3615   emit_int8(imm8);
3616 }
3617 
3618 void Assembler::pextrq(Address dst, XMMRegister src, int imm8) {
3619   assert(VM_Version::supports_sse4_1(), "");
3620   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3621   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3622   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3623   emit_int8(0x16);
3624   emit_operand(src, dst);
3625   emit_int8(imm8);
3626 }
3627 
3628 void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
3629   assert(VM_Version::supports_sse2(), "");
3630   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3631   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3632   emit_int8((unsigned char)0xC5);
3633   emit_int8((unsigned char)(0xC0 | encode));
3634   emit_int8(imm8);
3635 }
3636 
3637 void Assembler::pextrw(Address dst, XMMRegister src, int imm8) {
3638   assert(VM_Version::supports_sse4_1(), "");
3639   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3640   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
3641   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3642   emit_int8((unsigned char)0x15);
3643   emit_operand(src, dst);
3644   emit_int8(imm8);
3645 }
3646 
3647 void Assembler::pextrb(Address dst, XMMRegister src, int imm8) {
3648   assert(VM_Version::supports_sse4_1(), "");
3649   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3650   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
3651   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3652   emit_int8(0x14);
3653   emit_operand(src, dst);
3654   emit_int8(imm8);
3655 }
3656 
3657 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
3658   assert(VM_Version::supports_sse4_1(), "");
3659   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3660   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3661   emit_int8(0x22);
3662   emit_int8((unsigned char)(0xC0 | encode));
3663   emit_int8(imm8);
3664 }
3665 
3666 void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) {
3667   assert(VM_Version::supports_sse4_1(), "");
3668   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3669   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3670   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3671   emit_int8(0x22);
3672   emit_operand(dst,src);
3673   emit_int8(imm8);
3674 }
3675 
3676 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
3677   assert(VM_Version::supports_sse4_1(), "");
3678   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3679   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3680   emit_int8(0x22);
3681   emit_int8((unsigned char)(0xC0 | encode));
3682   emit_int8(imm8);
3683 }
3684 
3685 void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) {
3686   assert(VM_Version::supports_sse4_1(), "");
3687   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3688   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3689   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3690   emit_int8(0x22);
3691   emit_operand(dst, src);
3692   emit_int8(imm8);
3693 }
3694 
3695 void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
3696   assert(VM_Version::supports_sse2(), "");
3697   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3698   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3699   emit_int8((unsigned char)0xC4);
3700   emit_int8((unsigned char)(0xC0 | encode));
3701   emit_int8(imm8);
3702 }
3703 
3704 void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) {
3705   assert(VM_Version::supports_sse2(), "");
3706   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3707   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
3708   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3709   emit_int8((unsigned char)0xC4);
3710   emit_operand(dst, src);
3711   emit_int8(imm8);
3712 }
3713 
3714 void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) {
3715   assert(VM_Version::supports_sse4_1(), "");
3716   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3717   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
3718   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3719   emit_int8(0x20);
3720   emit_operand(dst, src);
3721   emit_int8(imm8);
3722 }
3723 
3724 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
3725   assert(VM_Version::supports_sse4_1(), "");
3726   InstructionMark im(this);
3727   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3728   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3729   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3730   emit_int8(0x30);
3731   emit_operand(dst, src);
3732 }
3733 
3734 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
3735   assert(VM_Version::supports_sse4_1(), "");
3736   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3737   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3738   emit_int8(0x30);
3739   emit_int8((unsigned char)(0xC0 | encode));
3740 }
3741 
3742 void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
3743   assert(VM_Version::supports_avx(), "");
3744   InstructionMark im(this);
3745   assert(dst != xnoreg, "sanity");
3746   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3747   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3748   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3749   emit_int8(0x30);
3750   emit_operand(dst, src);
3751 }
3752 
3753 void Assembler::evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
3754   assert(is_vector_masking(), "");
3755   assert(VM_Version::supports_avx512vlbw(), "");
3756   assert(dst != xnoreg, "sanity");
3757   InstructionMark im(this);
3758   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3759   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3760   attributes.set_embedded_opmask_register_specifier(mask);
3761   attributes.set_is_evex_instruction();
3762   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3763   emit_int8(0x30);
3764   emit_operand(dst, src);
3765 }
3766 
3767 void Assembler::evpmovwb(Address dst, XMMRegister src, int vector_len) {
3768   assert(VM_Version::supports_avx512vlbw(), "");
3769   assert(src != xnoreg, "sanity");
3770   InstructionMark im(this);
3771   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3772   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3773   attributes.set_is_evex_instruction();
3774   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
3775   emit_int8(0x30);
3776   emit_operand(src, dst);
3777 }
3778 
3779 void Assembler::evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len) {
3780   assert(is_vector_masking(), "");
3781   assert(VM_Version::supports_avx512vlbw(), "");
3782   assert(src != xnoreg, "sanity");
3783   InstructionMark im(this);
3784   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3785   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3786   attributes.set_embedded_opmask_register_specifier(mask);
3787   attributes.set_is_evex_instruction();
3788   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
3789   emit_int8(0x30);
3790   emit_operand(src, dst);
3791 }
3792 
3793 // generic
3794 void Assembler::pop(Register dst) {
3795   int encode = prefix_and_encode(dst->encoding());
3796   emit_int8(0x58 | encode);
3797 }
3798 
3799 void Assembler::popcntl(Register dst, Address src) {
3800   assert(VM_Version::supports_popcnt(), "must support");
3801   InstructionMark im(this);
3802   emit_int8((unsigned char)0xF3);
3803   prefix(src, dst);
3804   emit_int8(0x0F);
3805   emit_int8((unsigned char)0xB8);
3806   emit_operand(dst, src);
3807 }
3808 
3809 void Assembler::popcntl(Register dst, Register src) {
3810   assert(VM_Version::supports_popcnt(), "must support");
3811   emit_int8((unsigned char)0xF3);
3812   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3813   emit_int8(0x0F);
3814   emit_int8((unsigned char)0xB8);
3815   emit_int8((unsigned char)(0xC0 | encode));
3816 }
3817 
3818 void Assembler::popf() {
3819   emit_int8((unsigned char)0x9D);
3820 }
3821 
3822 #ifndef _LP64 // no 32bit push/pop on amd64
3823 void Assembler::popl(Address dst) {
3824   // NOTE: this will adjust stack by 8byte on 64bits
3825   InstructionMark im(this);
3826   prefix(dst);
3827   emit_int8((unsigned char)0x8F);
3828   emit_operand(rax, dst);
3829 }
3830 #endif
3831 
3832 void Assembler::prefetch_prefix(Address src) {
3833   prefix(src);
3834   emit_int8(0x0F);
3835 }
3836 
3837 void Assembler::prefetchnta(Address src) {
3838   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3839   InstructionMark im(this);
3840   prefetch_prefix(src);
3841   emit_int8(0x18);
3842   emit_operand(rax, src); // 0, src
3843 }
3844 
3845 void Assembler::prefetchr(Address src) {
3846   assert(VM_Version::supports_3dnow_prefetch(), "must support");
3847   InstructionMark im(this);
3848   prefetch_prefix(src);
3849   emit_int8(0x0D);
3850   emit_operand(rax, src); // 0, src
3851 }
3852 
3853 void Assembler::prefetcht0(Address src) {
3854   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3855   InstructionMark im(this);
3856   prefetch_prefix(src);
3857   emit_int8(0x18);
3858   emit_operand(rcx, src); // 1, src
3859 }
3860 
3861 void Assembler::prefetcht1(Address src) {
3862   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3863   InstructionMark im(this);
3864   prefetch_prefix(src);
3865   emit_int8(0x18);
3866   emit_operand(rdx, src); // 2, src
3867 }
3868 
3869 void Assembler::prefetcht2(Address src) {
3870   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3871   InstructionMark im(this);
3872   prefetch_prefix(src);
3873   emit_int8(0x18);
3874   emit_operand(rbx, src); // 3, src
3875 }
3876 
3877 void Assembler::prefetchw(Address src) {
3878   assert(VM_Version::supports_3dnow_prefetch(), "must support");
3879   InstructionMark im(this);
3880   prefetch_prefix(src);
3881   emit_int8(0x0D);
3882   emit_operand(rcx, src); // 1, src
3883 }
3884 
3885 void Assembler::prefix(Prefix p) {
3886   emit_int8(p);
3887 }
3888 
3889 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
3890   assert(VM_Version::supports_ssse3(), "");
3891   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3892   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3893   emit_int8(0x00);
3894   emit_int8((unsigned char)(0xC0 | encode));
3895 }
3896 
3897 void Assembler::vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3898   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
3899          vector_len == AVX_256bit? VM_Version::supports_avx2() :
3900          0, "");
3901   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3902   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3903   emit_int8(0x00);
3904   emit_int8((unsigned char)(0xC0 | encode));
3905 }
3906 
3907 void Assembler::pshufb(XMMRegister dst, Address src) {
3908   assert(VM_Version::supports_ssse3(), "");
3909   InstructionMark im(this);
3910   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3911   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3912   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3913   emit_int8(0x00);
3914   emit_operand(dst, src);
3915 }
3916 
3917 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
3918   assert(isByte(mode), "invalid value");
3919   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3920   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
3921   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3922   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3923   emit_int8(0x70);
3924   emit_int8((unsigned char)(0xC0 | encode));
3925   emit_int8(mode & 0xFF);
3926 }
3927 
3928 void Assembler::vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len) {
3929   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
3930          vector_len == AVX_256bit? VM_Version::supports_avx2() :
3931          0, "");
3932   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3933   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3934   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3935   emit_int8(0x70);
3936   emit_int8((unsigned char)(0xC0 | encode));
3937   emit_int8(mode & 0xFF);
3938 }
3939 
3940 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
3941   assert(isByte(mode), "invalid value");
3942   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3943   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3944   InstructionMark im(this);
3945   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3946   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3947   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3948   emit_int8(0x70);
3949   emit_operand(dst, src);
3950   emit_int8(mode & 0xFF);
3951 }
3952 
3953 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
3954   assert(isByte(mode), "invalid value");
3955   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3956   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3957   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3958   emit_int8(0x70);
3959   emit_int8((unsigned char)(0xC0 | encode));
3960   emit_int8(mode & 0xFF);
3961 }
3962 
3963 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
3964   assert(isByte(mode), "invalid value");
3965   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3966   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3967   InstructionMark im(this);
3968   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3969   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3970   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3971   emit_int8(0x70);
3972   emit_operand(dst, src);
3973   emit_int8(mode & 0xFF);
3974 }
3975 
3976 void Assembler::psrldq(XMMRegister dst, int shift) {
3977   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
3978   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3979   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3980   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3981   emit_int8(0x73);
3982   emit_int8((unsigned char)(0xC0 | encode));
3983   emit_int8(shift);
3984 }
3985 
3986 void Assembler::pslldq(XMMRegister dst, int shift) {
3987   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
3988   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3989   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3990   // XMM7 is for /7 encoding: 66 0F 73 /7 ib
3991   int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3992   emit_int8(0x73);
3993   emit_int8((unsigned char)(0xC0 | encode));
3994   emit_int8(shift);
3995 }
3996 
3997 void Assembler::ptest(XMMRegister dst, Address src) {
3998   assert(VM_Version::supports_sse4_1(), "");
3999   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4000   InstructionMark im(this);
4001   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4002   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4003   emit_int8(0x17);
4004   emit_operand(dst, src);
4005 }
4006 
4007 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
4008   assert(VM_Version::supports_sse4_1(), "");
4009   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4010   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4011   emit_int8(0x17);
4012   emit_int8((unsigned char)(0xC0 | encode));
4013 }
4014 
4015 void Assembler::vptest(XMMRegister dst, Address src) {
4016   assert(VM_Version::supports_avx(), "");
4017   InstructionMark im(this);
4018   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4019   assert(dst != xnoreg, "sanity");
4020   // swap src<->dst for encoding
4021   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4022   emit_int8(0x17);
4023   emit_operand(dst, src);
4024 }
4025 
4026 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
4027   assert(VM_Version::supports_avx(), "");
4028   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4029   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4030   emit_int8(0x17);
4031   emit_int8((unsigned char)(0xC0 | encode));
4032 }
4033 
4034 void Assembler::punpcklbw(XMMRegister dst, Address src) {
4035   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4036   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4037   InstructionMark im(this);
4038   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
4039   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4040   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4041   emit_int8(0x60);
4042   emit_operand(dst, src);
4043 }
4044 
4045 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
4046   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4047   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
4048   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4049   emit_int8(0x60);
4050   emit_int8((unsigned char)(0xC0 | encode));
4051 }
4052 
4053 void Assembler::punpckldq(XMMRegister dst, Address src) {
4054   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4055   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4056   InstructionMark im(this);
4057   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4058   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4059   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4060   emit_int8(0x62);
4061   emit_operand(dst, src);
4062 }
4063 
4064 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
4065   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4066   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4067   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4068   emit_int8(0x62);
4069   emit_int8((unsigned char)(0xC0 | encode));
4070 }
4071 
4072 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
4073   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4074   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4075   attributes.set_rex_vex_w_reverted();
4076   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4077   emit_int8(0x6C);
4078   emit_int8((unsigned char)(0xC0 | encode));
4079 }
4080 
4081 void Assembler::push(int32_t imm32) {
4082   // in 64bits we push 64bits onto the stack but only
4083   // take a 32bit immediate
4084   emit_int8(0x68);
4085   emit_int32(imm32);
4086 }
4087 
4088 void Assembler::push(Register src) {
4089   int encode = prefix_and_encode(src->encoding());
4090 
4091   emit_int8(0x50 | encode);
4092 }
4093 
4094 void Assembler::pushf() {
4095   emit_int8((unsigned char)0x9C);
4096 }
4097 
4098 #ifndef _LP64 // no 32bit push/pop on amd64
4099 void Assembler::pushl(Address src) {
4100   // Note this will push 64bit on 64bit
4101   InstructionMark im(this);
4102   prefix(src);
4103   emit_int8((unsigned char)0xFF);
4104   emit_operand(rsi, src);
4105 }
4106 #endif
4107 
4108 void Assembler::rcll(Register dst, int imm8) {
4109   assert(isShiftCount(imm8), "illegal shift count");
4110   int encode = prefix_and_encode(dst->encoding());
4111   if (imm8 == 1) {
4112     emit_int8((unsigned char)0xD1);
4113     emit_int8((unsigned char)(0xD0 | encode));
4114   } else {
4115     emit_int8((unsigned char)0xC1);
4116     emit_int8((unsigned char)0xD0 | encode);
4117     emit_int8(imm8);
4118   }
4119 }
4120 
4121 void Assembler::rcpps(XMMRegister dst, XMMRegister src) {
4122   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4123   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4124   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4125   emit_int8(0x53);
4126   emit_int8((unsigned char)(0xC0 | encode));
4127 }
4128 
4129 void Assembler::rcpss(XMMRegister dst, XMMRegister src) {
4130   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4131   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4132   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4133   emit_int8(0x53);
4134   emit_int8((unsigned char)(0xC0 | encode));
4135 }
4136 
4137 void Assembler::rdtsc() {
4138   emit_int8((unsigned char)0x0F);
4139   emit_int8((unsigned char)0x31);
4140 }
4141 
4142 // copies data from [esi] to [edi] using rcx pointer sized words
4143 // generic
4144 void Assembler::rep_mov() {
4145   emit_int8((unsigned char)0xF3);
4146   // MOVSQ
4147   LP64_ONLY(prefix(REX_W));
4148   emit_int8((unsigned char)0xA5);
4149 }
4150 
4151 // sets rcx bytes with rax, value at [edi]
4152 void Assembler::rep_stosb() {
4153   emit_int8((unsigned char)0xF3); // REP
4154   LP64_ONLY(prefix(REX_W));
4155   emit_int8((unsigned char)0xAA); // STOSB
4156 }
4157 
4158 // sets rcx pointer sized words with rax, value at [edi]
4159 // generic
4160 void Assembler::rep_stos() {
4161   emit_int8((unsigned char)0xF3); // REP
4162   LP64_ONLY(prefix(REX_W));       // LP64:STOSQ, LP32:STOSD
4163   emit_int8((unsigned char)0xAB);
4164 }
4165 
4166 // scans rcx pointer sized words at [edi] for occurance of rax,
4167 // generic
4168 void Assembler::repne_scan() { // repne_scan
4169   emit_int8((unsigned char)0xF2);
4170   // SCASQ
4171   LP64_ONLY(prefix(REX_W));
4172   emit_int8((unsigned char)0xAF);
4173 }
4174 
4175 #ifdef _LP64
4176 // scans rcx 4 byte words at [edi] for occurance of rax,
4177 // generic
4178 void Assembler::repne_scanl() { // repne_scan
4179   emit_int8((unsigned char)0xF2);
4180   // SCASL
4181   emit_int8((unsigned char)0xAF);
4182 }
4183 #endif
4184 
4185 void Assembler::ret(int imm16) {
4186   if (imm16 == 0) {
4187     emit_int8((unsigned char)0xC3);
4188   } else {
4189     emit_int8((unsigned char)0xC2);
4190     emit_int16(imm16);
4191   }
4192 }
4193 
4194 void Assembler::sahf() {
4195 #ifdef _LP64
4196   // Not supported in 64bit mode
4197   ShouldNotReachHere();
4198 #endif
4199   emit_int8((unsigned char)0x9E);
4200 }
4201 
4202 void Assembler::sarl(Register dst, int imm8) {
4203   int encode = prefix_and_encode(dst->encoding());
4204   assert(isShiftCount(imm8), "illegal shift count");
4205   if (imm8 == 1) {
4206     emit_int8((unsigned char)0xD1);
4207     emit_int8((unsigned char)(0xF8 | encode));
4208   } else {
4209     emit_int8((unsigned char)0xC1);
4210     emit_int8((unsigned char)(0xF8 | encode));
4211     emit_int8(imm8);
4212   }
4213 }
4214 
4215 void Assembler::sarl(Register dst) {
4216   int encode = prefix_and_encode(dst->encoding());
4217   emit_int8((unsigned char)0xD3);
4218   emit_int8((unsigned char)(0xF8 | encode));
4219 }
4220 
4221 void Assembler::sbbl(Address dst, int32_t imm32) {
4222   InstructionMark im(this);
4223   prefix(dst);
4224   emit_arith_operand(0x81, rbx, dst, imm32);
4225 }
4226 
4227 void Assembler::sbbl(Register dst, int32_t imm32) {
4228   prefix(dst);
4229   emit_arith(0x81, 0xD8, dst, imm32);
4230 }
4231 
4232 
4233 void Assembler::sbbl(Register dst, Address src) {
4234   InstructionMark im(this);
4235   prefix(src, dst);
4236   emit_int8(0x1B);
4237   emit_operand(dst, src);
4238 }
4239 
4240 void Assembler::sbbl(Register dst, Register src) {
4241   (void) prefix_and_encode(dst->encoding(), src->encoding());
4242   emit_arith(0x1B, 0xC0, dst, src);
4243 }
4244 
4245 void Assembler::setb(Condition cc, Register dst) {
4246   assert(0 <= cc && cc < 16, "illegal cc");
4247   int encode = prefix_and_encode(dst->encoding(), true);
4248   emit_int8(0x0F);
4249   emit_int8((unsigned char)0x90 | cc);
4250   emit_int8((unsigned char)(0xC0 | encode));
4251 }
4252 
4253 void Assembler::palignr(XMMRegister dst, XMMRegister src, int imm8) {
4254   assert(VM_Version::supports_ssse3(), "");
4255   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
4256   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4257   emit_int8((unsigned char)0x0F);
4258   emit_int8((unsigned char)(0xC0 | encode));
4259   emit_int8(imm8);
4260 }
4261 
4262 void Assembler::vpalignr(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
4263   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4264          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4265          0, "");
4266   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
4267   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4268   emit_int8((unsigned char)0x0F);
4269   emit_int8((unsigned char)(0xC0 | encode));
4270   emit_int8(imm8);
4271 }
4272 
4273 void Assembler::pblendw(XMMRegister dst, XMMRegister src, int imm8) {
4274   assert(VM_Version::supports_sse4_1(), "");
4275   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4276   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4277   emit_int8((unsigned char)0x0E);
4278   emit_int8((unsigned char)(0xC0 | encode));
4279   emit_int8(imm8);
4280 }
4281 
4282 void Assembler::sha1rnds4(XMMRegister dst, XMMRegister src, int imm8) {
4283   assert(VM_Version::supports_sha(), "");
4284   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4285   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_3A, &attributes);
4286   emit_int8((unsigned char)0xCC);
4287   emit_int8((unsigned char)(0xC0 | encode));
4288   emit_int8((unsigned char)imm8);
4289 }
4290 
4291 void Assembler::sha1nexte(XMMRegister dst, XMMRegister src) {
4292   assert(VM_Version::supports_sha(), "");
4293   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4294   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
4295   emit_int8((unsigned char)0xC8);
4296   emit_int8((unsigned char)(0xC0 | encode));
4297 }
4298 
4299 void Assembler::sha1msg1(XMMRegister dst, XMMRegister src) {
4300   assert(VM_Version::supports_sha(), "");
4301   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4302   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
4303   emit_int8((unsigned char)0xC9);
4304   emit_int8((unsigned char)(0xC0 | encode));
4305 }
4306 
4307 void Assembler::sha1msg2(XMMRegister dst, XMMRegister src) {
4308   assert(VM_Version::supports_sha(), "");
4309   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4310   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
4311   emit_int8((unsigned char)0xCA);
4312   emit_int8((unsigned char)(0xC0 | encode));
4313 }
4314 
4315 // xmm0 is implicit additional source to this instruction.
4316 void Assembler::sha256rnds2(XMMRegister dst, XMMRegister src) {
4317   assert(VM_Version::supports_sha(), "");
4318   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4319   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
4320   emit_int8((unsigned char)0xCB);
4321   emit_int8((unsigned char)(0xC0 | encode));
4322 }
4323 
4324 void Assembler::sha256msg1(XMMRegister dst, XMMRegister src) {
4325   assert(VM_Version::supports_sha(), "");
4326   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4327   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
4328   emit_int8((unsigned char)0xCC);
4329   emit_int8((unsigned char)(0xC0 | encode));
4330 }
4331 
4332 void Assembler::sha256msg2(XMMRegister dst, XMMRegister src) {
4333   assert(VM_Version::supports_sha(), "");
4334   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4335   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
4336   emit_int8((unsigned char)0xCD);
4337   emit_int8((unsigned char)(0xC0 | encode));
4338 }
4339 
4340 
4341 void Assembler::shll(Register dst, int imm8) {
4342   assert(isShiftCount(imm8), "illegal shift count");
4343   int encode = prefix_and_encode(dst->encoding());
4344   if (imm8 == 1 ) {
4345     emit_int8((unsigned char)0xD1);
4346     emit_int8((unsigned char)(0xE0 | encode));
4347   } else {
4348     emit_int8((unsigned char)0xC1);
4349     emit_int8((unsigned char)(0xE0 | encode));
4350     emit_int8(imm8);
4351   }
4352 }
4353 
4354 void Assembler::shll(Register dst) {
4355   int encode = prefix_and_encode(dst->encoding());
4356   emit_int8((unsigned char)0xD3);
4357   emit_int8((unsigned char)(0xE0 | encode));
4358 }
4359 
4360 void Assembler::shrl(Register dst, int imm8) {
4361   assert(isShiftCount(imm8), "illegal shift count");
4362   int encode = prefix_and_encode(dst->encoding());
4363   emit_int8((unsigned char)0xC1);
4364   emit_int8((unsigned char)(0xE8 | encode));
4365   emit_int8(imm8);
4366 }
4367 
4368 void Assembler::shrl(Register dst) {
4369   int encode = prefix_and_encode(dst->encoding());
4370   emit_int8((unsigned char)0xD3);
4371   emit_int8((unsigned char)(0xE8 | encode));
4372 }
4373 
4374 // copies a single word from [esi] to [edi]
4375 void Assembler::smovl() {
4376   emit_int8((unsigned char)0xA5);
4377 }
4378 
4379 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
4380   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4381   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4382   attributes.set_rex_vex_w_reverted();
4383   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4384   emit_int8(0x51);
4385   emit_int8((unsigned char)(0xC0 | encode));
4386 }
4387 
4388 void Assembler::sqrtsd(XMMRegister dst, Address src) {
4389   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4390   InstructionMark im(this);
4391   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4392   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4393   attributes.set_rex_vex_w_reverted();
4394   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4395   emit_int8(0x51);
4396   emit_operand(dst, src);
4397 }
4398 
4399 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
4400   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4401   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4402   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4403   emit_int8(0x51);
4404   emit_int8((unsigned char)(0xC0 | encode));
4405 }
4406 
4407 void Assembler::std() {
4408   emit_int8((unsigned char)0xFD);
4409 }
4410 
4411 void Assembler::sqrtss(XMMRegister dst, Address src) {
4412   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4413   InstructionMark im(this);
4414   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4415   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4416   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4417   emit_int8(0x51);
4418   emit_operand(dst, src);
4419 }
4420 
4421 void Assembler::stmxcsr( Address dst) {
4422   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4423   InstructionMark im(this);
4424   prefix(dst);
4425   emit_int8(0x0F);
4426   emit_int8((unsigned char)0xAE);
4427   emit_operand(as_Register(3), dst);
4428 }
4429 
4430 void Assembler::subl(Address dst, int32_t imm32) {
4431   InstructionMark im(this);
4432   prefix(dst);
4433   emit_arith_operand(0x81, rbp, dst, imm32);
4434 }
4435 
4436 void Assembler::subl(Address dst, Register src) {
4437   InstructionMark im(this);
4438   prefix(dst, src);
4439   emit_int8(0x29);
4440   emit_operand(src, dst);
4441 }
4442 
4443 void Assembler::subl(Register dst, int32_t imm32) {
4444   prefix(dst);
4445   emit_arith(0x81, 0xE8, dst, imm32);
4446 }
4447 
4448 // Force generation of a 4 byte immediate value even if it fits into 8bit
4449 void Assembler::subl_imm32(Register dst, int32_t imm32) {
4450   prefix(dst);
4451   emit_arith_imm32(0x81, 0xE8, dst, imm32);
4452 }
4453 
4454 void Assembler::subl(Register dst, Address src) {
4455   InstructionMark im(this);
4456   prefix(src, dst);
4457   emit_int8(0x2B);
4458   emit_operand(dst, src);
4459 }
4460 
4461 void Assembler::subl(Register dst, Register src) {
4462   (void) prefix_and_encode(dst->encoding(), src->encoding());
4463   emit_arith(0x2B, 0xC0, dst, src);
4464 }
4465 
4466 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
4467   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4468   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4469   attributes.set_rex_vex_w_reverted();
4470   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4471   emit_int8(0x5C);
4472   emit_int8((unsigned char)(0xC0 | encode));
4473 }
4474 
4475 void Assembler::subsd(XMMRegister dst, Address src) {
4476   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4477   InstructionMark im(this);
4478   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4479   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4480   attributes.set_rex_vex_w_reverted();
4481   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4482   emit_int8(0x5C);
4483   emit_operand(dst, src);
4484 }
4485 
4486 void Assembler::subss(XMMRegister dst, XMMRegister src) {
4487   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4488   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false , /* uses_vl */ false);
4489   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4490   emit_int8(0x5C);
4491   emit_int8((unsigned char)(0xC0 | encode));
4492 }
4493 
4494 void Assembler::subss(XMMRegister dst, Address src) {
4495   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4496   InstructionMark im(this);
4497   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4498   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4499   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4500   emit_int8(0x5C);
4501   emit_operand(dst, src);
4502 }
4503 
4504 void Assembler::testb(Register dst, int imm8) {
4505   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
4506   (void) prefix_and_encode(dst->encoding(), true);
4507   emit_arith_b(0xF6, 0xC0, dst, imm8);
4508 }
4509 
4510 void Assembler::testb(Address dst, int imm8) {
4511   InstructionMark im(this);
4512   prefix(dst);
4513   emit_int8((unsigned char)0xF6);
4514   emit_operand(rax, dst, 1);
4515   emit_int8(imm8);
4516 }
4517 
4518 void Assembler::testl(Register dst, int32_t imm32) {
4519   // not using emit_arith because test
4520   // doesn't support sign-extension of
4521   // 8bit operands
4522   int encode = dst->encoding();
4523   if (encode == 0) {
4524     emit_int8((unsigned char)0xA9);
4525   } else {
4526     encode = prefix_and_encode(encode);
4527     emit_int8((unsigned char)0xF7);
4528     emit_int8((unsigned char)(0xC0 | encode));
4529   }
4530   emit_int32(imm32);
4531 }
4532 
4533 void Assembler::testl(Register dst, Register src) {
4534   (void) prefix_and_encode(dst->encoding(), src->encoding());
4535   emit_arith(0x85, 0xC0, dst, src);
4536 }
4537 
4538 void Assembler::testl(Register dst, Address src) {
4539   InstructionMark im(this);
4540   prefix(src, dst);
4541   emit_int8((unsigned char)0x85);
4542   emit_operand(dst, src);
4543 }
4544 
4545 void Assembler::tzcntl(Register dst, Register src) {
4546   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
4547   emit_int8((unsigned char)0xF3);
4548   int encode = prefix_and_encode(dst->encoding(), src->encoding());
4549   emit_int8(0x0F);
4550   emit_int8((unsigned char)0xBC);
4551   emit_int8((unsigned char)0xC0 | encode);
4552 }
4553 
4554 void Assembler::tzcntq(Register dst, Register src) {
4555   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
4556   emit_int8((unsigned char)0xF3);
4557   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4558   emit_int8(0x0F);
4559   emit_int8((unsigned char)0xBC);
4560   emit_int8((unsigned char)(0xC0 | encode));
4561 }
4562 
4563 void Assembler::ucomisd(XMMRegister dst, Address src) {
4564   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4565   InstructionMark im(this);
4566   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4567   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4568   attributes.set_rex_vex_w_reverted();
4569   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4570   emit_int8(0x2E);
4571   emit_operand(dst, src);
4572 }
4573 
4574 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
4575   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4576   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4577   attributes.set_rex_vex_w_reverted();
4578   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4579   emit_int8(0x2E);
4580   emit_int8((unsigned char)(0xC0 | encode));
4581 }
4582 
4583 void Assembler::ucomiss(XMMRegister dst, Address src) {
4584   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4585   InstructionMark im(this);
4586   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4587   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4588   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4589   emit_int8(0x2E);
4590   emit_operand(dst, src);
4591 }
4592 
4593 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
4594   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4595   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4596   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4597   emit_int8(0x2E);
4598   emit_int8((unsigned char)(0xC0 | encode));
4599 }
4600 
4601 void Assembler::xabort(int8_t imm8) {
4602   emit_int8((unsigned char)0xC6);
4603   emit_int8((unsigned char)0xF8);
4604   emit_int8((unsigned char)(imm8 & 0xFF));
4605 }
4606 
4607 void Assembler::xaddb(Address dst, Register src) {
4608   InstructionMark im(this);
4609   prefix(dst, src, true);
4610   emit_int8(0x0F);
4611   emit_int8((unsigned char)0xC0);
4612   emit_operand(src, dst);
4613 }
4614 
4615 void Assembler::xaddw(Address dst, Register src) {
4616   InstructionMark im(this);
4617   emit_int8(0x66);
4618   prefix(dst, src);
4619   emit_int8(0x0F);
4620   emit_int8((unsigned char)0xC1);
4621   emit_operand(src, dst);
4622 }
4623 
4624 void Assembler::xaddl(Address dst, Register src) {
4625   InstructionMark im(this);
4626   prefix(dst, src);
4627   emit_int8(0x0F);
4628   emit_int8((unsigned char)0xC1);
4629   emit_operand(src, dst);
4630 }
4631 
4632 void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
4633   InstructionMark im(this);
4634   relocate(rtype);
4635   if (abort.is_bound()) {
4636     address entry = target(abort);
4637     assert(entry != NULL, "abort entry NULL");
4638     intptr_t offset = entry - pc();
4639     emit_int8((unsigned char)0xC7);
4640     emit_int8((unsigned char)0xF8);
4641     emit_int32(offset - 6); // 2 opcode + 4 address
4642   } else {
4643     abort.add_patch_at(code(), locator());
4644     emit_int8((unsigned char)0xC7);
4645     emit_int8((unsigned char)0xF8);
4646     emit_int32(0);
4647   }
4648 }
4649 
4650 void Assembler::xchgb(Register dst, Address src) { // xchg
4651   InstructionMark im(this);
4652   prefix(src, dst, true);
4653   emit_int8((unsigned char)0x86);
4654   emit_operand(dst, src);
4655 }
4656 
4657 void Assembler::xchgw(Register dst, Address src) { // xchg
4658   InstructionMark im(this);
4659   emit_int8(0x66);
4660   prefix(src, dst);
4661   emit_int8((unsigned char)0x87);
4662   emit_operand(dst, src);
4663 }
4664 
4665 void Assembler::xchgl(Register dst, Address src) { // xchg
4666   InstructionMark im(this);
4667   prefix(src, dst);
4668   emit_int8((unsigned char)0x87);
4669   emit_operand(dst, src);
4670 }
4671 
4672 void Assembler::xchgl(Register dst, Register src) {
4673   int encode = prefix_and_encode(dst->encoding(), src->encoding());
4674   emit_int8((unsigned char)0x87);
4675   emit_int8((unsigned char)(0xC0 | encode));
4676 }
4677 
4678 void Assembler::xend() {
4679   emit_int8((unsigned char)0x0F);
4680   emit_int8((unsigned char)0x01);
4681   emit_int8((unsigned char)0xD5);
4682 }
4683 
4684 void Assembler::xgetbv() {
4685   emit_int8(0x0F);
4686   emit_int8(0x01);
4687   emit_int8((unsigned char)0xD0);
4688 }
4689 
4690 void Assembler::xorl(Register dst, int32_t imm32) {
4691   prefix(dst);
4692   emit_arith(0x81, 0xF0, dst, imm32);
4693 }
4694 
4695 void Assembler::xorl(Register dst, Address src) {
4696   InstructionMark im(this);
4697   prefix(src, dst);
4698   emit_int8(0x33);
4699   emit_operand(dst, src);
4700 }
4701 
4702 void Assembler::xorl(Register dst, Register src) {
4703   (void) prefix_and_encode(dst->encoding(), src->encoding());
4704   emit_arith(0x33, 0xC0, dst, src);
4705 }
4706 
4707 void Assembler::xorb(Register dst, Address src) {
4708   InstructionMark im(this);
4709   prefix(src, dst);
4710   emit_int8(0x32);
4711   emit_operand(dst, src);
4712 }
4713 
4714 // AVX 3-operands scalar float-point arithmetic instructions
4715 
4716 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
4717   assert(VM_Version::supports_avx(), "");
4718   InstructionMark im(this);
4719   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4720   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4721   attributes.set_rex_vex_w_reverted();
4722   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4723   emit_int8(0x58);
4724   emit_operand(dst, src);
4725 }
4726 
4727 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4728   assert(VM_Version::supports_avx(), "");
4729   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4730   attributes.set_rex_vex_w_reverted();
4731   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4732   emit_int8(0x58);
4733   emit_int8((unsigned char)(0xC0 | encode));
4734 }
4735 
4736 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
4737   assert(VM_Version::supports_avx(), "");
4738   InstructionMark im(this);
4739   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4740   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4741   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4742   emit_int8(0x58);
4743   emit_operand(dst, src);
4744 }
4745 
4746 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4747   assert(VM_Version::supports_avx(), "");
4748   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4749   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4750   emit_int8(0x58);
4751   emit_int8((unsigned char)(0xC0 | encode));
4752 }
4753 
4754 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
4755   assert(VM_Version::supports_avx(), "");
4756   InstructionMark im(this);
4757   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4758   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4759   attributes.set_rex_vex_w_reverted();
4760   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4761   emit_int8(0x5E);
4762   emit_operand(dst, src);
4763 }
4764 
4765 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4766   assert(VM_Version::supports_avx(), "");
4767   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4768   attributes.set_rex_vex_w_reverted();
4769   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4770   emit_int8(0x5E);
4771   emit_int8((unsigned char)(0xC0 | encode));
4772 }
4773 
4774 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
4775   assert(VM_Version::supports_avx(), "");
4776   InstructionMark im(this);
4777   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4778   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4779   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4780   emit_int8(0x5E);
4781   emit_operand(dst, src);
4782 }
4783 
4784 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4785   assert(VM_Version::supports_avx(), "");
4786   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4787   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4788   emit_int8(0x5E);
4789   emit_int8((unsigned char)(0xC0 | encode));
4790 }
4791 
4792 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
4793   assert(VM_Version::supports_avx(), "");
4794   InstructionMark im(this);
4795   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4796   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4797   attributes.set_rex_vex_w_reverted();
4798   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4799   emit_int8(0x59);
4800   emit_operand(dst, src);
4801 }
4802 
4803 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4804   assert(VM_Version::supports_avx(), "");
4805   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4806   attributes.set_rex_vex_w_reverted();
4807   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4808   emit_int8(0x59);
4809   emit_int8((unsigned char)(0xC0 | encode));
4810 }
4811 
4812 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
4813   assert(VM_Version::supports_avx(), "");
4814   InstructionMark im(this);
4815   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4816   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4817   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4818   emit_int8(0x59);
4819   emit_operand(dst, src);
4820 }
4821 
4822 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4823   assert(VM_Version::supports_avx(), "");
4824   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4825   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4826   emit_int8(0x59);
4827   emit_int8((unsigned char)(0xC0 | encode));
4828 }
4829 
4830 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
4831   assert(VM_Version::supports_avx(), "");
4832   InstructionMark im(this);
4833   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4834   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4835   attributes.set_rex_vex_w_reverted();
4836   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4837   emit_int8(0x5C);
4838   emit_operand(dst, src);
4839 }
4840 
4841 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4842   assert(VM_Version::supports_avx(), "");
4843   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4844   attributes.set_rex_vex_w_reverted();
4845   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4846   emit_int8(0x5C);
4847   emit_int8((unsigned char)(0xC0 | encode));
4848 }
4849 
4850 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
4851   assert(VM_Version::supports_avx(), "");
4852   InstructionMark im(this);
4853   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4854   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4855   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4856   emit_int8(0x5C);
4857   emit_operand(dst, src);
4858 }
4859 
4860 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4861   assert(VM_Version::supports_avx(), "");
4862   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4863   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4864   emit_int8(0x5C);
4865   emit_int8((unsigned char)(0xC0 | encode));
4866 }
4867 
4868 //====================VECTOR ARITHMETIC=====================================
4869 
4870 // Float-point vector arithmetic
4871 
4872 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
4873   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4874   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4875   attributes.set_rex_vex_w_reverted();
4876   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4877   emit_int8(0x58);
4878   emit_int8((unsigned char)(0xC0 | encode));
4879 }
4880 
4881 void Assembler::addpd(XMMRegister dst, Address src) {
4882   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4883   InstructionMark im(this);
4884   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4885   attributes.set_rex_vex_w_reverted();
4886   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
4887   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4888   emit_int8(0x58);
4889   emit_operand(dst, src);
4890 }
4891 
4892 
4893 void Assembler::addps(XMMRegister dst, XMMRegister src) {
4894   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4895   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4896   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4897   emit_int8(0x58);
4898   emit_int8((unsigned char)(0xC0 | encode));
4899 }
4900 
4901 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4902   assert(VM_Version::supports_avx(), "");
4903   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4904   attributes.set_rex_vex_w_reverted();
4905   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4906   emit_int8(0x58);
4907   emit_int8((unsigned char)(0xC0 | encode));
4908 }
4909 
4910 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4911   assert(VM_Version::supports_avx(), "");
4912   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4913   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4914   emit_int8(0x58);
4915   emit_int8((unsigned char)(0xC0 | encode));
4916 }
4917 
4918 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4919   assert(VM_Version::supports_avx(), "");
4920   InstructionMark im(this);
4921   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4922   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
4923   attributes.set_rex_vex_w_reverted();
4924   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4925   emit_int8(0x58);
4926   emit_operand(dst, src);
4927 }
4928 
4929 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4930   assert(VM_Version::supports_avx(), "");
4931   InstructionMark im(this);
4932   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4933   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4934   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4935   emit_int8(0x58);
4936   emit_operand(dst, src);
4937 }
4938 
4939 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
4940   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4941   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4942   attributes.set_rex_vex_w_reverted();
4943   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4944   emit_int8(0x5C);
4945   emit_int8((unsigned char)(0xC0 | encode));
4946 }
4947 
4948 void Assembler::subps(XMMRegister dst, XMMRegister src) {
4949   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4950   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4951   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4952   emit_int8(0x5C);
4953   emit_int8((unsigned char)(0xC0 | encode));
4954 }
4955 
4956 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4957   assert(VM_Version::supports_avx(), "");
4958   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4959   attributes.set_rex_vex_w_reverted();
4960   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4961   emit_int8(0x5C);
4962   emit_int8((unsigned char)(0xC0 | encode));
4963 }
4964 
4965 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4966   assert(VM_Version::supports_avx(), "");
4967   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4968   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4969   emit_int8(0x5C);
4970   emit_int8((unsigned char)(0xC0 | encode));
4971 }
4972 
4973 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4974   assert(VM_Version::supports_avx(), "");
4975   InstructionMark im(this);
4976   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4977   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
4978   attributes.set_rex_vex_w_reverted();
4979   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4980   emit_int8(0x5C);
4981   emit_operand(dst, src);
4982 }
4983 
4984 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4985   assert(VM_Version::supports_avx(), "");
4986   InstructionMark im(this);
4987   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4988   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4989   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4990   emit_int8(0x5C);
4991   emit_operand(dst, src);
4992 }
4993 
4994 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
4995   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4996   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4997   attributes.set_rex_vex_w_reverted();
4998   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4999   emit_int8(0x59);
5000   emit_int8((unsigned char)(0xC0 | encode));
5001 }
5002 
5003 void Assembler::mulpd(XMMRegister dst, Address src) {
5004   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5005   InstructionMark im(this);
5006   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5007   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5008   attributes.set_rex_vex_w_reverted();
5009   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5010   emit_int8(0x59);
5011   emit_operand(dst, src);
5012 }
5013 
5014 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
5015   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5016   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5017   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5018   emit_int8(0x59);
5019   emit_int8((unsigned char)(0xC0 | encode));
5020 }
5021 
5022 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5023   assert(VM_Version::supports_avx(), "");
5024   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5025   attributes.set_rex_vex_w_reverted();
5026   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5027   emit_int8(0x59);
5028   emit_int8((unsigned char)(0xC0 | encode));
5029 }
5030 
5031 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5032   assert(VM_Version::supports_avx(), "");
5033   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5034   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5035   emit_int8(0x59);
5036   emit_int8((unsigned char)(0xC0 | encode));
5037 }
5038 
5039 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5040   assert(VM_Version::supports_avx(), "");
5041   InstructionMark im(this);
5042   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5043   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5044   attributes.set_rex_vex_w_reverted();
5045   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5046   emit_int8(0x59);
5047   emit_operand(dst, src);
5048 }
5049 
5050 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5051   assert(VM_Version::supports_avx(), "");
5052   InstructionMark im(this);
5053   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5054   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5055   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5056   emit_int8(0x59);
5057   emit_operand(dst, src);
5058 }
5059 
5060 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
5061   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5062   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5063   attributes.set_rex_vex_w_reverted();
5064   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5065   emit_int8(0x5E);
5066   emit_int8((unsigned char)(0xC0 | encode));
5067 }
5068 
5069 void Assembler::divps(XMMRegister dst, XMMRegister src) {
5070   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5071   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5072   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5073   emit_int8(0x5E);
5074   emit_int8((unsigned char)(0xC0 | encode));
5075 }
5076 
5077 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5078   assert(VM_Version::supports_avx(), "");
5079   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5080   attributes.set_rex_vex_w_reverted();
5081   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5082   emit_int8(0x5E);
5083   emit_int8((unsigned char)(0xC0 | encode));
5084 }
5085 
5086 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5087   assert(VM_Version::supports_avx(), "");
5088   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5089   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5090   emit_int8(0x5E);
5091   emit_int8((unsigned char)(0xC0 | encode));
5092 }
5093 
5094 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5095   assert(VM_Version::supports_avx(), "");
5096   InstructionMark im(this);
5097   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5098   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5099   attributes.set_rex_vex_w_reverted();
5100   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5101   emit_int8(0x5E);
5102   emit_operand(dst, src);
5103 }
5104 
5105 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5106   assert(VM_Version::supports_avx(), "");
5107   InstructionMark im(this);
5108   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5109   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5110   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5111   emit_int8(0x5E);
5112   emit_operand(dst, src);
5113 }
5114 
5115 void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
5116   assert(VM_Version::supports_avx(), "");
5117   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5118   attributes.set_rex_vex_w_reverted();
5119   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5120   emit_int8(0x51);
5121   emit_int8((unsigned char)(0xC0 | encode));
5122 }
5123 
5124 void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
5125   assert(VM_Version::supports_avx(), "");
5126   InstructionMark im(this);
5127   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5128   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5129   attributes.set_rex_vex_w_reverted();
5130   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5131   emit_int8(0x51);
5132   emit_operand(dst, src);
5133 }
5134 
5135 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
5136   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5137   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5138   attributes.set_rex_vex_w_reverted();
5139   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5140   emit_int8(0x54);
5141   emit_int8((unsigned char)(0xC0 | encode));
5142 }
5143 
5144 void Assembler::andps(XMMRegister dst, XMMRegister src) {
5145   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5146   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5147   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5148   emit_int8(0x54);
5149   emit_int8((unsigned char)(0xC0 | encode));
5150 }
5151 
5152 void Assembler::andps(XMMRegister dst, Address src) {
5153   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5154   InstructionMark im(this);
5155   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5156   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5157   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5158   emit_int8(0x54);
5159   emit_operand(dst, src);
5160 }
5161 
5162 void Assembler::andpd(XMMRegister dst, Address src) {
5163   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5164   InstructionMark im(this);
5165   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5166   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5167   attributes.set_rex_vex_w_reverted();
5168   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5169   emit_int8(0x54);
5170   emit_operand(dst, src);
5171 }
5172 
5173 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5174   assert(VM_Version::supports_avx(), "");
5175   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5176   attributes.set_rex_vex_w_reverted();
5177   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5178   emit_int8(0x54);
5179   emit_int8((unsigned char)(0xC0 | encode));
5180 }
5181 
5182 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5183   assert(VM_Version::supports_avx(), "");
5184   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5185   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5186   emit_int8(0x54);
5187   emit_int8((unsigned char)(0xC0 | encode));
5188 }
5189 
5190 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5191   assert(VM_Version::supports_avx(), "");
5192   InstructionMark im(this);
5193   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5194   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5195   attributes.set_rex_vex_w_reverted();
5196   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5197   emit_int8(0x54);
5198   emit_operand(dst, src);
5199 }
5200 
5201 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5202   assert(VM_Version::supports_avx(), "");
5203   InstructionMark im(this);
5204   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5205   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5206   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5207   emit_int8(0x54);
5208   emit_operand(dst, src);
5209 }
5210 
5211 void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
5212   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5213   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5214   attributes.set_rex_vex_w_reverted();
5215   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5216   emit_int8(0x15);
5217   emit_int8((unsigned char)(0xC0 | encode));
5218 }
5219 
5220 void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
5221   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5222   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5223   attributes.set_rex_vex_w_reverted();
5224   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5225   emit_int8(0x14);
5226   emit_int8((unsigned char)(0xC0 | encode));
5227 }
5228 
5229 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
5230   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5231   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5232   attributes.set_rex_vex_w_reverted();
5233   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5234   emit_int8(0x57);
5235   emit_int8((unsigned char)(0xC0 | encode));
5236 }
5237 
5238 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
5239   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5240   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5241   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5242   emit_int8(0x57);
5243   emit_int8((unsigned char)(0xC0 | encode));
5244 }
5245 
5246 void Assembler::xorpd(XMMRegister dst, Address src) {
5247   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5248   InstructionMark im(this);
5249   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5250   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5251   attributes.set_rex_vex_w_reverted();
5252   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5253   emit_int8(0x57);
5254   emit_operand(dst, src);
5255 }
5256 
5257 void Assembler::xorps(XMMRegister dst, Address src) {
5258   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5259   InstructionMark im(this);
5260   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5261   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5262   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5263   emit_int8(0x57);
5264   emit_operand(dst, src);
5265 }
5266 
5267 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5268   assert(VM_Version::supports_avx(), "");
5269   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5270   attributes.set_rex_vex_w_reverted();
5271   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5272   emit_int8(0x57);
5273   emit_int8((unsigned char)(0xC0 | encode));
5274 }
5275 
5276 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5277   assert(VM_Version::supports_avx(), "");
5278   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5279   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5280   emit_int8(0x57);
5281   emit_int8((unsigned char)(0xC0 | encode));
5282 }
5283 
5284 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5285   assert(VM_Version::supports_avx(), "");
5286   InstructionMark im(this);
5287   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5288   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5289   attributes.set_rex_vex_w_reverted();
5290   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5291   emit_int8(0x57);
5292   emit_operand(dst, src);
5293 }
5294 
5295 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5296   assert(VM_Version::supports_avx(), "");
5297   InstructionMark im(this);
5298   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5299   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5300   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5301   emit_int8(0x57);
5302   emit_operand(dst, src);
5303 }
5304 
5305 // Integer vector arithmetic
5306 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5307   assert(VM_Version::supports_avx() && (vector_len == 0) ||
5308          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
5309   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
5310   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5311   emit_int8(0x01);
5312   emit_int8((unsigned char)(0xC0 | encode));
5313 }
5314 
5315 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5316   assert(VM_Version::supports_avx() && (vector_len == 0) ||
5317          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
5318   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
5319   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5320   emit_int8(0x02);
5321   emit_int8((unsigned char)(0xC0 | encode));
5322 }
5323 
5324 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
5325   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5326   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5327   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5328   emit_int8((unsigned char)0xFC);
5329   emit_int8((unsigned char)(0xC0 | encode));
5330 }
5331 
5332 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
5333   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5334   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5335   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5336   emit_int8((unsigned char)0xFD);
5337   emit_int8((unsigned char)(0xC0 | encode));
5338 }
5339 
5340 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
5341   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5342   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5343   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5344   emit_int8((unsigned char)0xFE);
5345   emit_int8((unsigned char)(0xC0 | encode));
5346 }
5347 
5348 void Assembler::paddd(XMMRegister dst, Address src) {
5349   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5350   InstructionMark im(this);
5351   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5352   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5353   emit_int8((unsigned char)0xFE);
5354   emit_operand(dst, src);
5355 }
5356 
5357 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
5358   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5359   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5360   attributes.set_rex_vex_w_reverted();
5361   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5362   emit_int8((unsigned char)0xD4);
5363   emit_int8((unsigned char)(0xC0 | encode));
5364 }
5365 
5366 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
5367   assert(VM_Version::supports_sse3(), "");
5368   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
5369   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5370   emit_int8(0x01);
5371   emit_int8((unsigned char)(0xC0 | encode));
5372 }
5373 
5374 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
5375   assert(VM_Version::supports_sse3(), "");
5376   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
5377   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5378   emit_int8(0x02);
5379   emit_int8((unsigned char)(0xC0 | encode));
5380 }
5381 
5382 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5383   assert(UseAVX > 0, "requires some form of AVX");
5384   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5385   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5386   emit_int8((unsigned char)0xFC);
5387   emit_int8((unsigned char)(0xC0 | encode));
5388 }
5389 
5390 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5391   assert(UseAVX > 0, "requires some form of AVX");
5392   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5393   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5394   emit_int8((unsigned char)0xFD);
5395   emit_int8((unsigned char)(0xC0 | encode));
5396 }
5397 
5398 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5399   assert(UseAVX > 0, "requires some form of AVX");
5400   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5401   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5402   emit_int8((unsigned char)0xFE);
5403   emit_int8((unsigned char)(0xC0 | encode));
5404 }
5405 
5406 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5407   assert(UseAVX > 0, "requires some form of AVX");
5408   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5409   attributes.set_rex_vex_w_reverted();
5410   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5411   emit_int8((unsigned char)0xD4);
5412   emit_int8((unsigned char)(0xC0 | encode));
5413 }
5414 
5415 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5416   assert(UseAVX > 0, "requires some form of AVX");
5417   InstructionMark im(this);
5418   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5419   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5420   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5421   emit_int8((unsigned char)0xFC);
5422   emit_operand(dst, src);
5423 }
5424 
5425 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5426   assert(UseAVX > 0, "requires some form of AVX");
5427   InstructionMark im(this);
5428   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5429   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5430   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5431   emit_int8((unsigned char)0xFD);
5432   emit_operand(dst, src);
5433 }
5434 
5435 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5436   assert(UseAVX > 0, "requires some form of AVX");
5437   InstructionMark im(this);
5438   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5439   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5440   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5441   emit_int8((unsigned char)0xFE);
5442   emit_operand(dst, src);
5443 }
5444 
5445 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5446   assert(UseAVX > 0, "requires some form of AVX");
5447   InstructionMark im(this);
5448   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5449   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5450   attributes.set_rex_vex_w_reverted();
5451   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5452   emit_int8((unsigned char)0xD4);
5453   emit_operand(dst, src);
5454 }
5455 
5456 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
5457   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5458   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5459   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5460   emit_int8((unsigned char)0xF8);
5461   emit_int8((unsigned char)(0xC0 | encode));
5462 }
5463 
5464 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
5465   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5466   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5467   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5468   emit_int8((unsigned char)0xF9);
5469   emit_int8((unsigned char)(0xC0 | encode));
5470 }
5471 
5472 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
5473   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5474   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5475   emit_int8((unsigned char)0xFA);
5476   emit_int8((unsigned char)(0xC0 | encode));
5477 }
5478 
5479 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
5480   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5481   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5482   attributes.set_rex_vex_w_reverted();
5483   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5484   emit_int8((unsigned char)0xFB);
5485   emit_int8((unsigned char)(0xC0 | encode));
5486 }
5487 
5488 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5489   assert(UseAVX > 0, "requires some form of AVX");
5490   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5491   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5492   emit_int8((unsigned char)0xF8);
5493   emit_int8((unsigned char)(0xC0 | encode));
5494 }
5495 
5496 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5497   assert(UseAVX > 0, "requires some form of AVX");
5498   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5499   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5500   emit_int8((unsigned char)0xF9);
5501   emit_int8((unsigned char)(0xC0 | encode));
5502 }
5503 
5504 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5505   assert(UseAVX > 0, "requires some form of AVX");
5506   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5507   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5508   emit_int8((unsigned char)0xFA);
5509   emit_int8((unsigned char)(0xC0 | encode));
5510 }
5511 
5512 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5513   assert(UseAVX > 0, "requires some form of AVX");
5514   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5515   attributes.set_rex_vex_w_reverted();
5516   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5517   emit_int8((unsigned char)0xFB);
5518   emit_int8((unsigned char)(0xC0 | encode));
5519 }
5520 
5521 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5522   assert(UseAVX > 0, "requires some form of AVX");
5523   InstructionMark im(this);
5524   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5525   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5526   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5527   emit_int8((unsigned char)0xF8);
5528   emit_operand(dst, src);
5529 }
5530 
5531 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5532   assert(UseAVX > 0, "requires some form of AVX");
5533   InstructionMark im(this);
5534   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5535   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5536   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5537   emit_int8((unsigned char)0xF9);
5538   emit_operand(dst, src);
5539 }
5540 
5541 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5542   assert(UseAVX > 0, "requires some form of AVX");
5543   InstructionMark im(this);
5544   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5545   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5546   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5547   emit_int8((unsigned char)0xFA);
5548   emit_operand(dst, src);
5549 }
5550 
5551 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5552   assert(UseAVX > 0, "requires some form of AVX");
5553   InstructionMark im(this);
5554   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5555   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5556   attributes.set_rex_vex_w_reverted();
5557   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5558   emit_int8((unsigned char)0xFB);
5559   emit_operand(dst, src);
5560 }
5561 
5562 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
5563   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5564   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5565   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5566   emit_int8((unsigned char)0xD5);
5567   emit_int8((unsigned char)(0xC0 | encode));
5568 }
5569 
5570 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
5571   assert(VM_Version::supports_sse4_1(), "");
5572   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5573   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5574   emit_int8(0x40);
5575   emit_int8((unsigned char)(0xC0 | encode));
5576 }
5577 
5578 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5579   assert(UseAVX > 0, "requires some form of AVX");
5580   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5581   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5582   emit_int8((unsigned char)0xD5);
5583   emit_int8((unsigned char)(0xC0 | encode));
5584 }
5585 
5586 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5587   assert(UseAVX > 0, "requires some form of AVX");
5588   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5589   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5590   emit_int8(0x40);
5591   emit_int8((unsigned char)(0xC0 | encode));
5592 }
5593 
5594 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5595   assert(UseAVX > 2, "requires some form of EVEX");
5596   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5597   attributes.set_is_evex_instruction();
5598   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5599   emit_int8(0x40);
5600   emit_int8((unsigned char)(0xC0 | encode));
5601 }
5602 
5603 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5604   assert(UseAVX > 0, "requires some form of AVX");
5605   InstructionMark im(this);
5606   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5607   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5608   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5609   emit_int8((unsigned char)0xD5);
5610   emit_operand(dst, src);
5611 }
5612 
5613 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5614   assert(UseAVX > 0, "requires some form of AVX");
5615   InstructionMark im(this);
5616   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5617   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5618   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5619   emit_int8(0x40);
5620   emit_operand(dst, src);
5621 }
5622 
5623 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5624   assert(UseAVX > 2, "requires some form of EVEX");
5625   InstructionMark im(this);
5626   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5627   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5628   attributes.set_is_evex_instruction();
5629   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5630   emit_int8(0x40);
5631   emit_operand(dst, src);
5632 }
5633 
5634 // Shift packed integers left by specified number of bits.
5635 void Assembler::psllw(XMMRegister dst, int shift) {
5636   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5637   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5638   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
5639   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5640   emit_int8(0x71);
5641   emit_int8((unsigned char)(0xC0 | encode));
5642   emit_int8(shift & 0xFF);
5643 }
5644 
5645 void Assembler::pslld(XMMRegister dst, int shift) {
5646   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5647   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5648   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
5649   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5650   emit_int8(0x72);
5651   emit_int8((unsigned char)(0xC0 | encode));
5652   emit_int8(shift & 0xFF);
5653 }
5654 
5655 void Assembler::psllq(XMMRegister dst, int shift) {
5656   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5657   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5658   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
5659   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5660   emit_int8(0x73);
5661   emit_int8((unsigned char)(0xC0 | encode));
5662   emit_int8(shift & 0xFF);
5663 }
5664 
5665 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
5666   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5667   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5668   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5669   emit_int8((unsigned char)0xF1);
5670   emit_int8((unsigned char)(0xC0 | encode));
5671 }
5672 
5673 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
5674   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5675   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5676   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5677   emit_int8((unsigned char)0xF2);
5678   emit_int8((unsigned char)(0xC0 | encode));
5679 }
5680 
5681 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
5682   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5683   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5684   attributes.set_rex_vex_w_reverted();
5685   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5686   emit_int8((unsigned char)0xF3);
5687   emit_int8((unsigned char)(0xC0 | encode));
5688 }
5689 
5690 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5691   assert(UseAVX > 0, "requires some form of AVX");
5692   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5693   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
5694   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5695   emit_int8(0x71);
5696   emit_int8((unsigned char)(0xC0 | encode));
5697   emit_int8(shift & 0xFF);
5698 }
5699 
5700 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5701   assert(UseAVX > 0, "requires some form of AVX");
5702   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5703   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5704   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
5705   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5706   emit_int8(0x72);
5707   emit_int8((unsigned char)(0xC0 | encode));
5708   emit_int8(shift & 0xFF);
5709 }
5710 
5711 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5712   assert(UseAVX > 0, "requires some form of AVX");
5713   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5714   attributes.set_rex_vex_w_reverted();
5715   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
5716   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5717   emit_int8(0x73);
5718   emit_int8((unsigned char)(0xC0 | encode));
5719   emit_int8(shift & 0xFF);
5720 }
5721 
5722 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5723   assert(UseAVX > 0, "requires some form of AVX");
5724   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5725   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5726   emit_int8((unsigned char)0xF1);
5727   emit_int8((unsigned char)(0xC0 | encode));
5728 }
5729 
5730 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5731   assert(UseAVX > 0, "requires some form of AVX");
5732   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5733   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5734   emit_int8((unsigned char)0xF2);
5735   emit_int8((unsigned char)(0xC0 | encode));
5736 }
5737 
5738 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5739   assert(UseAVX > 0, "requires some form of AVX");
5740   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5741   attributes.set_rex_vex_w_reverted();
5742   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5743   emit_int8((unsigned char)0xF3);
5744   emit_int8((unsigned char)(0xC0 | encode));
5745 }
5746 
5747 // Shift packed integers logically right by specified number of bits.
5748 void Assembler::psrlw(XMMRegister dst, int shift) {
5749   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5750   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5751   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
5752   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5753   emit_int8(0x71);
5754   emit_int8((unsigned char)(0xC0 | encode));
5755   emit_int8(shift & 0xFF);
5756 }
5757 
5758 void Assembler::psrld(XMMRegister dst, int shift) {
5759   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5760   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5761   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
5762   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5763   emit_int8(0x72);
5764   emit_int8((unsigned char)(0xC0 | encode));
5765   emit_int8(shift & 0xFF);
5766 }
5767 
5768 void Assembler::psrlq(XMMRegister dst, int shift) {
5769   // Do not confuse it with psrldq SSE2 instruction which
5770   // shifts 128 bit value in xmm register by number of bytes.
5771   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5772   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5773   attributes.set_rex_vex_w_reverted();
5774   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
5775   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5776   emit_int8(0x73);
5777   emit_int8((unsigned char)(0xC0 | encode));
5778   emit_int8(shift & 0xFF);
5779 }
5780 
5781 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
5782   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5783   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5784   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5785   emit_int8((unsigned char)0xD1);
5786   emit_int8((unsigned char)(0xC0 | encode));
5787 }
5788 
5789 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
5790   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5791   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5792   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5793   emit_int8((unsigned char)0xD2);
5794   emit_int8((unsigned char)(0xC0 | encode));
5795 }
5796 
5797 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
5798   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5799   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5800   attributes.set_rex_vex_w_reverted();
5801   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5802   emit_int8((unsigned char)0xD3);
5803   emit_int8((unsigned char)(0xC0 | encode));
5804 }
5805 
5806 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5807   assert(UseAVX > 0, "requires some form of AVX");
5808   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5809   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
5810   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5811   emit_int8(0x71);
5812   emit_int8((unsigned char)(0xC0 | encode));
5813   emit_int8(shift & 0xFF);
5814 }
5815 
5816 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5817   assert(UseAVX > 0, "requires some form of AVX");
5818   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5819   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
5820   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5821   emit_int8(0x72);
5822   emit_int8((unsigned char)(0xC0 | encode));
5823   emit_int8(shift & 0xFF);
5824 }
5825 
5826 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5827   assert(UseAVX > 0, "requires some form of AVX");
5828   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5829   attributes.set_rex_vex_w_reverted();
5830   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
5831   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5832   emit_int8(0x73);
5833   emit_int8((unsigned char)(0xC0 | encode));
5834   emit_int8(shift & 0xFF);
5835 }
5836 
5837 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5838   assert(UseAVX > 0, "requires some form of AVX");
5839   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5840   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5841   emit_int8((unsigned char)0xD1);
5842   emit_int8((unsigned char)(0xC0 | encode));
5843 }
5844 
5845 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5846   assert(UseAVX > 0, "requires some form of AVX");
5847   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5848   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5849   emit_int8((unsigned char)0xD2);
5850   emit_int8((unsigned char)(0xC0 | encode));
5851 }
5852 
5853 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5854   assert(UseAVX > 0, "requires some form of AVX");
5855   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5856   attributes.set_rex_vex_w_reverted();
5857   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5858   emit_int8((unsigned char)0xD3);
5859   emit_int8((unsigned char)(0xC0 | encode));
5860 }
5861 
5862 // Shift packed integers arithmetically right by specified number of bits.
5863 void Assembler::psraw(XMMRegister dst, int shift) {
5864   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5865   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5866   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
5867   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5868   emit_int8(0x71);
5869   emit_int8((unsigned char)(0xC0 | encode));
5870   emit_int8(shift & 0xFF);
5871 }
5872 
5873 void Assembler::psrad(XMMRegister dst, int shift) {
5874   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5875   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5876   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
5877   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5878   emit_int8(0x72);
5879   emit_int8((unsigned char)(0xC0 | encode));
5880   emit_int8(shift & 0xFF);
5881 }
5882 
5883 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
5884   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5885   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5886   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5887   emit_int8((unsigned char)0xE1);
5888   emit_int8((unsigned char)(0xC0 | encode));
5889 }
5890 
5891 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
5892   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5893   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5894   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5895   emit_int8((unsigned char)0xE2);
5896   emit_int8((unsigned char)(0xC0 | encode));
5897 }
5898 
5899 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5900   assert(UseAVX > 0, "requires some form of AVX");
5901   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5902   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
5903   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5904   emit_int8(0x71);
5905   emit_int8((unsigned char)(0xC0 | encode));
5906   emit_int8(shift & 0xFF);
5907 }
5908 
5909 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5910   assert(UseAVX > 0, "requires some form of AVX");
5911   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5912   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
5913   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5914   emit_int8(0x72);
5915   emit_int8((unsigned char)(0xC0 | encode));
5916   emit_int8(shift & 0xFF);
5917 }
5918 
5919 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5920   assert(UseAVX > 0, "requires some form of AVX");
5921   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5922   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5923   emit_int8((unsigned char)0xE1);
5924   emit_int8((unsigned char)(0xC0 | encode));
5925 }
5926 
5927 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5928   assert(UseAVX > 0, "requires some form of AVX");
5929   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5930   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5931   emit_int8((unsigned char)0xE2);
5932   emit_int8((unsigned char)(0xC0 | encode));
5933 }
5934 
5935 
5936 // logical operations packed integers
5937 void Assembler::pand(XMMRegister dst, XMMRegister src) {
5938   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5939   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5940   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5941   emit_int8((unsigned char)0xDB);
5942   emit_int8((unsigned char)(0xC0 | encode));
5943 }
5944 
5945 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5946   assert(UseAVX > 0, "requires some form of AVX");
5947   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5948   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5949   emit_int8((unsigned char)0xDB);
5950   emit_int8((unsigned char)(0xC0 | encode));
5951 }
5952 
5953 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5954   assert(UseAVX > 0, "requires some form of AVX");
5955   InstructionMark im(this);
5956   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5957   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5958   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5959   emit_int8((unsigned char)0xDB);
5960   emit_operand(dst, src);
5961 }
5962 
5963 void Assembler::pandn(XMMRegister dst, XMMRegister src) {
5964   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5965   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5966   attributes.set_rex_vex_w_reverted();
5967   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5968   emit_int8((unsigned char)0xDF);
5969   emit_int8((unsigned char)(0xC0 | encode));
5970 }
5971 
5972 void Assembler::por(XMMRegister dst, XMMRegister src) {
5973   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5974   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5975   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5976   emit_int8((unsigned char)0xEB);
5977   emit_int8((unsigned char)(0xC0 | encode));
5978 }
5979 
5980 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5981   assert(UseAVX > 0, "requires some form of AVX");
5982   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5983   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5984   emit_int8((unsigned char)0xEB);
5985   emit_int8((unsigned char)(0xC0 | encode));
5986 }
5987 
5988 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5989   assert(UseAVX > 0, "requires some form of AVX");
5990   InstructionMark im(this);
5991   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5992   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5993   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5994   emit_int8((unsigned char)0xEB);
5995   emit_operand(dst, src);
5996 }
5997 
5998 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
5999   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6000   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6001   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6002   emit_int8((unsigned char)0xEF);
6003   emit_int8((unsigned char)(0xC0 | encode));
6004 }
6005 
6006 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6007   assert(UseAVX > 0, "requires some form of AVX");
6008   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6009   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6010   emit_int8((unsigned char)0xEF);
6011   emit_int8((unsigned char)(0xC0 | encode));
6012 }
6013 
6014 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6015   assert(UseAVX > 0, "requires some form of AVX");
6016   InstructionMark im(this);
6017   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6018   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6019   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6020   emit_int8((unsigned char)0xEF);
6021   emit_operand(dst, src);
6022 }
6023 
6024 
6025 // vinserti forms
6026 
6027 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6028   assert(VM_Version::supports_avx2(), "");
6029   assert(imm8 <= 0x01, "imm8: %u", imm8);
6030   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6031   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6032   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6033   emit_int8(0x38);
6034   emit_int8((unsigned char)(0xC0 | encode));
6035   // 0x00 - insert into lower 128 bits
6036   // 0x01 - insert into upper 128 bits
6037   emit_int8(imm8 & 0x01);
6038 }
6039 
6040 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6041   assert(VM_Version::supports_avx2(), "");
6042   assert(dst != xnoreg, "sanity");
6043   assert(imm8 <= 0x01, "imm8: %u", imm8);
6044   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6045   InstructionMark im(this);
6046   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6047   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6048   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6049   emit_int8(0x38);
6050   emit_operand(dst, src);
6051   // 0x00 - insert into lower 128 bits
6052   // 0x01 - insert into upper 128 bits
6053   emit_int8(imm8 & 0x01);
6054 }
6055 
6056 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6057   assert(VM_Version::supports_evex(), "");
6058   assert(imm8 <= 0x03, "imm8: %u", imm8);
6059   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6060   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6061   emit_int8(0x38);
6062   emit_int8((unsigned char)(0xC0 | encode));
6063   // 0x00 - insert into q0 128 bits (0..127)
6064   // 0x01 - insert into q1 128 bits (128..255)
6065   // 0x02 - insert into q2 128 bits (256..383)
6066   // 0x03 - insert into q3 128 bits (384..511)
6067   emit_int8(imm8 & 0x03);
6068 }
6069 
6070 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6071   assert(VM_Version::supports_avx(), "");
6072   assert(dst != xnoreg, "sanity");
6073   assert(imm8 <= 0x03, "imm8: %u", imm8);
6074   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6075   InstructionMark im(this);
6076   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6077   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6078   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6079   emit_int8(0x18);
6080   emit_operand(dst, src);
6081   // 0x00 - insert into q0 128 bits (0..127)
6082   // 0x01 - insert into q1 128 bits (128..255)
6083   // 0x02 - insert into q2 128 bits (256..383)
6084   // 0x03 - insert into q3 128 bits (384..511)
6085   emit_int8(imm8 & 0x03);
6086 }
6087 
6088 void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6089   assert(VM_Version::supports_evex(), "");
6090   assert(imm8 <= 0x01, "imm8: %u", imm8);
6091   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6092   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6093   emit_int8(0x38);
6094   emit_int8((unsigned char)(0xC0 | encode));
6095   // 0x00 - insert into lower 256 bits
6096   // 0x01 - insert into upper 256 bits
6097   emit_int8(imm8 & 0x01);
6098 }
6099 
6100 
6101 // vinsertf forms
6102 
6103 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6104   assert(VM_Version::supports_avx(), "");
6105   assert(imm8 <= 0x01, "imm8: %u", imm8);
6106   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6107   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6108   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6109   emit_int8(0x18);
6110   emit_int8((unsigned char)(0xC0 | encode));
6111   // 0x00 - insert into lower 128 bits
6112   // 0x01 - insert into upper 128 bits
6113   emit_int8(imm8 & 0x01);
6114 }
6115 
6116 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6117   assert(VM_Version::supports_avx(), "");
6118   assert(dst != xnoreg, "sanity");
6119   assert(imm8 <= 0x01, "imm8: %u", imm8);
6120   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6121   InstructionMark im(this);
6122   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6123   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6124   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6125   emit_int8(0x18);
6126   emit_operand(dst, src);
6127   // 0x00 - insert into lower 128 bits
6128   // 0x01 - insert into upper 128 bits
6129   emit_int8(imm8 & 0x01);
6130 }
6131 
6132 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6133   assert(VM_Version::supports_evex(), "");
6134   assert(imm8 <= 0x03, "imm8: %u", imm8);
6135   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6136   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6137   emit_int8(0x18);
6138   emit_int8((unsigned char)(0xC0 | encode));
6139   // 0x00 - insert into q0 128 bits (0..127)
6140   // 0x01 - insert into q1 128 bits (128..255)
6141   // 0x02 - insert into q2 128 bits (256..383)
6142   // 0x03 - insert into q3 128 bits (384..511)
6143   emit_int8(imm8 & 0x03);
6144 }
6145 
6146 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6147   assert(VM_Version::supports_avx(), "");
6148   assert(dst != xnoreg, "sanity");
6149   assert(imm8 <= 0x03, "imm8: %u", imm8);
6150   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6151   InstructionMark im(this);
6152   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6153   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6154   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6155   emit_int8(0x18);
6156   emit_operand(dst, src);
6157   // 0x00 - insert into q0 128 bits (0..127)
6158   // 0x01 - insert into q1 128 bits (128..255)
6159   // 0x02 - insert into q2 128 bits (256..383)
6160   // 0x03 - insert into q3 128 bits (384..511)
6161   emit_int8(imm8 & 0x03);
6162 }
6163 
6164 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6165   assert(VM_Version::supports_evex(), "");
6166   assert(imm8 <= 0x01, "imm8: %u", imm8);
6167   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6168   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6169   emit_int8(0x1A);
6170   emit_int8((unsigned char)(0xC0 | encode));
6171   // 0x00 - insert into lower 256 bits
6172   // 0x01 - insert into upper 256 bits
6173   emit_int8(imm8 & 0x01);
6174 }
6175 
6176 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6177   assert(VM_Version::supports_evex(), "");
6178   assert(dst != xnoreg, "sanity");
6179   assert(imm8 <= 0x01, "imm8: %u", imm8);
6180   InstructionMark im(this);
6181   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6182   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
6183   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6184   emit_int8(0x1A);
6185   emit_operand(dst, src);
6186   // 0x00 - insert into lower 256 bits
6187   // 0x01 - insert into upper 256 bits
6188   emit_int8(imm8 & 0x01);
6189 }
6190 
6191 
6192 // vextracti forms
6193 
6194 void Assembler::vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6195   assert(VM_Version::supports_avx(), "");
6196   assert(imm8 <= 0x01, "imm8: %u", imm8);
6197   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6198   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6199   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6200   emit_int8(0x39);
6201   emit_int8((unsigned char)(0xC0 | encode));
6202   // 0x00 - extract from lower 128 bits
6203   // 0x01 - extract from upper 128 bits
6204   emit_int8(imm8 & 0x01);
6205 }
6206 
6207 void Assembler::vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
6208   assert(VM_Version::supports_avx2(), "");
6209   assert(src != xnoreg, "sanity");
6210   assert(imm8 <= 0x01, "imm8: %u", imm8);
6211   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6212   InstructionMark im(this);
6213   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6214   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6215   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6216   emit_int8(0x39);
6217   emit_operand(src, dst);
6218   // 0x00 - extract from lower 128 bits
6219   // 0x01 - extract from upper 128 bits
6220   emit_int8(imm8 & 0x01);
6221 }
6222 
6223 void Assembler::vextracti32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6224   assert(VM_Version::supports_avx(), "");
6225   assert(imm8 <= 0x03, "imm8: %u", imm8);
6226   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6227   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6228   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6229   emit_int8(0x39);
6230   emit_int8((unsigned char)(0xC0 | encode));
6231   // 0x00 - extract from bits 127:0
6232   // 0x01 - extract from bits 255:128
6233   // 0x02 - extract from bits 383:256
6234   // 0x03 - extract from bits 511:384
6235   emit_int8(imm8 & 0x03);
6236 }
6237 
6238 void Assembler::vextracti32x4(Address dst, XMMRegister src, uint8_t imm8) {
6239   assert(VM_Version::supports_evex(), "");
6240   assert(src != xnoreg, "sanity");
6241   assert(imm8 <= 0x03, "imm8: %u", imm8);
6242   InstructionMark im(this);
6243   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6244   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6245   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6246   emit_int8(0x39);
6247   emit_operand(src, dst);
6248   // 0x00 - extract from bits 127:0
6249   // 0x01 - extract from bits 255:128
6250   // 0x02 - extract from bits 383:256
6251   // 0x03 - extract from bits 511:384
6252   emit_int8(imm8 & 0x03);
6253 }
6254 
6255 void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6256   assert(VM_Version::supports_avx512dq(), "");
6257   assert(imm8 <= 0x03, "imm8: %u", imm8);
6258   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6259   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6260   emit_int8(0x39);
6261   emit_int8((unsigned char)(0xC0 | encode));
6262   // 0x00 - extract from bits 127:0
6263   // 0x01 - extract from bits 255:128
6264   // 0x02 - extract from bits 383:256
6265   // 0x03 - extract from bits 511:384
6266   emit_int8(imm8 & 0x03);
6267 }
6268 
6269 void Assembler::vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6270   assert(VM_Version::supports_evex(), "");
6271   assert(imm8 <= 0x01, "imm8: %u", imm8);
6272   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6273   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6274   emit_int8(0x3B);
6275   emit_int8((unsigned char)(0xC0 | encode));
6276   // 0x00 - extract from lower 256 bits
6277   // 0x01 - extract from upper 256 bits
6278   emit_int8(imm8 & 0x01);
6279 }
6280 
6281 
6282 // vextractf forms
6283 
6284 void Assembler::vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6285   assert(VM_Version::supports_avx(), "");
6286   assert(imm8 <= 0x01, "imm8: %u", imm8);
6287   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6288   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6289   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6290   emit_int8(0x19);
6291   emit_int8((unsigned char)(0xC0 | encode));
6292   // 0x00 - extract from lower 128 bits
6293   // 0x01 - extract from upper 128 bits
6294   emit_int8(imm8 & 0x01);
6295 }
6296 
6297 void Assembler::vextractf128(Address dst, XMMRegister src, uint8_t imm8) {
6298   assert(VM_Version::supports_avx(), "");
6299   assert(src != xnoreg, "sanity");
6300   assert(imm8 <= 0x01, "imm8: %u", imm8);
6301   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6302   InstructionMark im(this);
6303   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6304   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6305   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6306   emit_int8(0x19);
6307   emit_operand(src, dst);
6308   // 0x00 - extract from lower 128 bits
6309   // 0x01 - extract from upper 128 bits
6310   emit_int8(imm8 & 0x01);
6311 }
6312 
6313 void Assembler::vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6314   assert(VM_Version::supports_avx(), "");
6315   assert(imm8 <= 0x03, "imm8: %u", imm8);
6316   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6317   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6318   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6319   emit_int8(0x19);
6320   emit_int8((unsigned char)(0xC0 | encode));
6321   // 0x00 - extract from bits 127:0
6322   // 0x01 - extract from bits 255:128
6323   // 0x02 - extract from bits 383:256
6324   // 0x03 - extract from bits 511:384
6325   emit_int8(imm8 & 0x03);
6326 }
6327 
6328 void Assembler::vextractf32x4(Address dst, XMMRegister src, uint8_t imm8) {
6329   assert(VM_Version::supports_evex(), "");
6330   assert(src != xnoreg, "sanity");
6331   assert(imm8 <= 0x03, "imm8: %u", imm8);
6332   InstructionMark im(this);
6333   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6334   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6335   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6336   emit_int8(0x19);
6337   emit_operand(src, dst);
6338   // 0x00 - extract from bits 127:0
6339   // 0x01 - extract from bits 255:128
6340   // 0x02 - extract from bits 383:256
6341   // 0x03 - extract from bits 511:384
6342   emit_int8(imm8 & 0x03);
6343 }
6344 
6345 void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6346   assert(VM_Version::supports_avx512dq(), "");
6347   assert(imm8 <= 0x03, "imm8: %u", imm8);
6348   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6349   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6350   emit_int8(0x19);
6351   emit_int8((unsigned char)(0xC0 | encode));
6352   // 0x00 - extract from bits 127:0
6353   // 0x01 - extract from bits 255:128
6354   // 0x02 - extract from bits 383:256
6355   // 0x03 - extract from bits 511:384
6356   emit_int8(imm8 & 0x03);
6357 }
6358 
6359 void Assembler::vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6360   assert(VM_Version::supports_evex(), "");
6361   assert(imm8 <= 0x01, "imm8: %u", imm8);
6362   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6363   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6364   emit_int8(0x1B);
6365   emit_int8((unsigned char)(0xC0 | encode));
6366   // 0x00 - extract from lower 256 bits
6367   // 0x01 - extract from upper 256 bits
6368   emit_int8(imm8 & 0x01);
6369 }
6370 
6371 void Assembler::vextractf64x4(Address dst, XMMRegister src, uint8_t imm8) {
6372   assert(VM_Version::supports_evex(), "");
6373   assert(src != xnoreg, "sanity");
6374   assert(imm8 <= 0x01, "imm8: %u", imm8);
6375   InstructionMark im(this);
6376   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6377   attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */  EVEX_64bit);
6378   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6379   emit_int8(0x1B);
6380   emit_operand(src, dst);
6381   // 0x00 - extract from lower 256 bits
6382   // 0x01 - extract from upper 256 bits
6383   emit_int8(imm8 & 0x01);
6384 }
6385 
6386 
6387 // legacy word/dword replicate
6388 void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
6389   assert(VM_Version::supports_avx2(), "");
6390   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6391   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6392   emit_int8(0x79);
6393   emit_int8((unsigned char)(0xC0 | encode));
6394 }
6395 
6396 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
6397   assert(VM_Version::supports_avx2(), "");
6398   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6399   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6400   emit_int8(0x58);
6401   emit_int8((unsigned char)(0xC0 | encode));
6402 }
6403 
6404 
6405 // xmm/mem sourced byte/word/dword/qword replicate
6406 
6407 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6408 void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
6409   assert(VM_Version::supports_evex(), "");
6410   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6411   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6412   emit_int8(0x78);
6413   emit_int8((unsigned char)(0xC0 | encode));
6414 }
6415 
6416 void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
6417   assert(VM_Version::supports_evex(), "");
6418   assert(dst != xnoreg, "sanity");
6419   InstructionMark im(this);
6420   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6421   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
6422   // swap src<->dst for encoding
6423   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6424   emit_int8(0x78);
6425   emit_operand(dst, src);
6426 }
6427 
6428 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6429 void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
6430   assert(VM_Version::supports_evex(), "");
6431   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6432   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6433   emit_int8(0x79);
6434   emit_int8((unsigned char)(0xC0 | encode));
6435 }
6436 
6437 void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) {
6438   assert(VM_Version::supports_evex(), "");
6439   assert(dst != xnoreg, "sanity");
6440   InstructionMark im(this);
6441   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6442   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
6443   // swap src<->dst for encoding
6444   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6445   emit_int8(0x79);
6446   emit_operand(dst, src);
6447 }
6448 
6449 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
6450 void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
6451   assert(VM_Version::supports_evex(), "");
6452   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6453   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6454   emit_int8(0x58);
6455   emit_int8((unsigned char)(0xC0 | encode));
6456 }
6457 
6458 void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
6459   assert(VM_Version::supports_evex(), "");
6460   assert(dst != xnoreg, "sanity");
6461   InstructionMark im(this);
6462   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6463   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
6464   // swap src<->dst for encoding
6465   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6466   emit_int8(0x58);
6467   emit_operand(dst, src);
6468 }
6469 
6470 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
6471 void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
6472   assert(VM_Version::supports_evex(), "");
6473   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6474   attributes.set_rex_vex_w_reverted();
6475   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6476   emit_int8(0x59);
6477   emit_int8((unsigned char)(0xC0 | encode));
6478 }
6479 
6480 void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
6481   assert(VM_Version::supports_evex(), "");
6482   assert(dst != xnoreg, "sanity");
6483   InstructionMark im(this);
6484   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6485   attributes.set_rex_vex_w_reverted();
6486   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
6487   // swap src<->dst for encoding
6488   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6489   emit_int8(0x59);
6490   emit_operand(dst, src);
6491 }
6492 
6493 
6494 // scalar single/double precision replicate
6495 
6496 // duplicate single precision data from src into programmed locations in dest : requires AVX512VL
6497 void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
6498   assert(VM_Version::supports_evex(), "");
6499   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6500   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6501   emit_int8(0x18);
6502   emit_int8((unsigned char)(0xC0 | encode));
6503 }
6504 
6505 void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
6506   assert(VM_Version::supports_evex(), "");
6507   assert(dst != xnoreg, "sanity");
6508   InstructionMark im(this);
6509   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6510   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
6511   // swap src<->dst for encoding
6512   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6513   emit_int8(0x18);
6514   emit_operand(dst, src);
6515 }
6516 
6517 // duplicate double precision data from src into programmed locations in dest : requires AVX512VL
6518 void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
6519   assert(VM_Version::supports_evex(), "");
6520   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6521   attributes.set_rex_vex_w_reverted();
6522   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6523   emit_int8(0x19);
6524   emit_int8((unsigned char)(0xC0 | encode));
6525 }
6526 
6527 void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
6528   assert(VM_Version::supports_evex(), "");
6529   assert(dst != xnoreg, "sanity");
6530   InstructionMark im(this);
6531   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6532   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
6533   attributes.set_rex_vex_w_reverted();
6534   // swap src<->dst for encoding
6535   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6536   emit_int8(0x19);
6537   emit_operand(dst, src);
6538 }
6539 
6540 
6541 // gpr source broadcast forms
6542 
6543 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6544 void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
6545   assert(VM_Version::supports_evex(), "");
6546   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6547   attributes.set_is_evex_instruction();
6548   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6549   emit_int8(0x7A);
6550   emit_int8((unsigned char)(0xC0 | encode));
6551 }
6552 
6553 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6554 void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
6555   assert(VM_Version::supports_evex(), "");
6556   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6557   attributes.set_is_evex_instruction();
6558   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6559   emit_int8(0x7B);
6560   emit_int8((unsigned char)(0xC0 | encode));
6561 }
6562 
6563 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
6564 void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
6565   assert(VM_Version::supports_evex(), "");
6566   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6567   attributes.set_is_evex_instruction();
6568   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6569   emit_int8(0x7C);
6570   emit_int8((unsigned char)(0xC0 | encode));
6571 }
6572 
6573 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
6574 void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
6575   assert(VM_Version::supports_evex(), "");
6576   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6577   attributes.set_is_evex_instruction();
6578   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6579   emit_int8(0x7C);
6580   emit_int8((unsigned char)(0xC0 | encode));
6581 }
6582 
6583 
6584 // Carry-Less Multiplication Quadword
6585 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
6586   assert(VM_Version::supports_clmul(), "");
6587   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
6588   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6589   emit_int8(0x44);
6590   emit_int8((unsigned char)(0xC0 | encode));
6591   emit_int8((unsigned char)mask);
6592 }
6593 
6594 // Carry-Less Multiplication Quadword
6595 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
6596   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
6597   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
6598   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6599   emit_int8(0x44);
6600   emit_int8((unsigned char)(0xC0 | encode));
6601   emit_int8((unsigned char)mask);
6602 }
6603 
6604 void Assembler::vzeroupper() {
6605   assert(VM_Version::supports_avx(), "");
6606   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
6607   (void)vex_prefix_and_encode(0, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6608   emit_int8(0x77);
6609 }
6610 
6611 #ifndef _LP64
6612 // 32bit only pieces of the assembler
6613 
6614 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
6615   // NO PREFIX AS NEVER 64BIT
6616   InstructionMark im(this);
6617   emit_int8((unsigned char)0x81);
6618   emit_int8((unsigned char)(0xF8 | src1->encoding()));
6619   emit_data(imm32, rspec, 0);
6620 }
6621 
6622 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
6623   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
6624   InstructionMark im(this);
6625   emit_int8((unsigned char)0x81);
6626   emit_operand(rdi, src1);
6627   emit_data(imm32, rspec, 0);
6628 }
6629 
6630 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
6631 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
6632 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
6633 void Assembler::cmpxchg8(Address adr) {
6634   InstructionMark im(this);
6635   emit_int8(0x0F);
6636   emit_int8((unsigned char)0xC7);
6637   emit_operand(rcx, adr);
6638 }
6639 
6640 void Assembler::decl(Register dst) {
6641   // Don't use it directly. Use MacroAssembler::decrementl() instead.
6642  emit_int8(0x48 | dst->encoding());
6643 }
6644 
6645 #endif // _LP64
6646 
6647 // 64bit typically doesn't use the x87 but needs to for the trig funcs
6648 
6649 void Assembler::fabs() {
6650   emit_int8((unsigned char)0xD9);
6651   emit_int8((unsigned char)0xE1);
6652 }
6653 
6654 void Assembler::fadd(int i) {
6655   emit_farith(0xD8, 0xC0, i);
6656 }
6657 
6658 void Assembler::fadd_d(Address src) {
6659   InstructionMark im(this);
6660   emit_int8((unsigned char)0xDC);
6661   emit_operand32(rax, src);
6662 }
6663 
6664 void Assembler::fadd_s(Address src) {
6665   InstructionMark im(this);
6666   emit_int8((unsigned char)0xD8);
6667   emit_operand32(rax, src);
6668 }
6669 
6670 void Assembler::fadda(int i) {
6671   emit_farith(0xDC, 0xC0, i);
6672 }
6673 
6674 void Assembler::faddp(int i) {
6675   emit_farith(0xDE, 0xC0, i);
6676 }
6677 
6678 void Assembler::fchs() {
6679   emit_int8((unsigned char)0xD9);
6680   emit_int8((unsigned char)0xE0);
6681 }
6682 
6683 void Assembler::fcom(int i) {
6684   emit_farith(0xD8, 0xD0, i);
6685 }
6686 
6687 void Assembler::fcomp(int i) {
6688   emit_farith(0xD8, 0xD8, i);
6689 }
6690 
6691 void Assembler::fcomp_d(Address src) {
6692   InstructionMark im(this);
6693   emit_int8((unsigned char)0xDC);
6694   emit_operand32(rbx, src);
6695 }
6696 
6697 void Assembler::fcomp_s(Address src) {
6698   InstructionMark im(this);
6699   emit_int8((unsigned char)0xD8);
6700   emit_operand32(rbx, src);
6701 }
6702 
6703 void Assembler::fcompp() {
6704   emit_int8((unsigned char)0xDE);
6705   emit_int8((unsigned char)0xD9);
6706 }
6707 
6708 void Assembler::fcos() {
6709   emit_int8((unsigned char)0xD9);
6710   emit_int8((unsigned char)0xFF);
6711 }
6712 
6713 void Assembler::fdecstp() {
6714   emit_int8((unsigned char)0xD9);
6715   emit_int8((unsigned char)0xF6);
6716 }
6717 
6718 void Assembler::fdiv(int i) {
6719   emit_farith(0xD8, 0xF0, i);
6720 }
6721 
6722 void Assembler::fdiv_d(Address src) {
6723   InstructionMark im(this);
6724   emit_int8((unsigned char)0xDC);
6725   emit_operand32(rsi, src);
6726 }
6727 
6728 void Assembler::fdiv_s(Address src) {
6729   InstructionMark im(this);
6730   emit_int8((unsigned char)0xD8);
6731   emit_operand32(rsi, src);
6732 }
6733 
6734 void Assembler::fdiva(int i) {
6735   emit_farith(0xDC, 0xF8, i);
6736 }
6737 
6738 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
6739 //       is erroneous for some of the floating-point instructions below.
6740 
6741 void Assembler::fdivp(int i) {
6742   emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
6743 }
6744 
6745 void Assembler::fdivr(int i) {
6746   emit_farith(0xD8, 0xF8, i);
6747 }
6748 
6749 void Assembler::fdivr_d(Address src) {
6750   InstructionMark im(this);
6751   emit_int8((unsigned char)0xDC);
6752   emit_operand32(rdi, src);
6753 }
6754 
6755 void Assembler::fdivr_s(Address src) {
6756   InstructionMark im(this);
6757   emit_int8((unsigned char)0xD8);
6758   emit_operand32(rdi, src);
6759 }
6760 
6761 void Assembler::fdivra(int i) {
6762   emit_farith(0xDC, 0xF0, i);
6763 }
6764 
6765 void Assembler::fdivrp(int i) {
6766   emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
6767 }
6768 
6769 void Assembler::ffree(int i) {
6770   emit_farith(0xDD, 0xC0, i);
6771 }
6772 
6773 void Assembler::fild_d(Address adr) {
6774   InstructionMark im(this);
6775   emit_int8((unsigned char)0xDF);
6776   emit_operand32(rbp, adr);
6777 }
6778 
6779 void Assembler::fild_s(Address adr) {
6780   InstructionMark im(this);
6781   emit_int8((unsigned char)0xDB);
6782   emit_operand32(rax, adr);
6783 }
6784 
6785 void Assembler::fincstp() {
6786   emit_int8((unsigned char)0xD9);
6787   emit_int8((unsigned char)0xF7);
6788 }
6789 
6790 void Assembler::finit() {
6791   emit_int8((unsigned char)0x9B);
6792   emit_int8((unsigned char)0xDB);
6793   emit_int8((unsigned char)0xE3);
6794 }
6795 
6796 void Assembler::fist_s(Address adr) {
6797   InstructionMark im(this);
6798   emit_int8((unsigned char)0xDB);
6799   emit_operand32(rdx, adr);
6800 }
6801 
6802 void Assembler::fistp_d(Address adr) {
6803   InstructionMark im(this);
6804   emit_int8((unsigned char)0xDF);
6805   emit_operand32(rdi, adr);
6806 }
6807 
6808 void Assembler::fistp_s(Address adr) {
6809   InstructionMark im(this);
6810   emit_int8((unsigned char)0xDB);
6811   emit_operand32(rbx, adr);
6812 }
6813 
6814 void Assembler::fld1() {
6815   emit_int8((unsigned char)0xD9);
6816   emit_int8((unsigned char)0xE8);
6817 }
6818 
6819 void Assembler::fld_d(Address adr) {
6820   InstructionMark im(this);
6821   emit_int8((unsigned char)0xDD);
6822   emit_operand32(rax, adr);
6823 }
6824 
6825 void Assembler::fld_s(Address adr) {
6826   InstructionMark im(this);
6827   emit_int8((unsigned char)0xD9);
6828   emit_operand32(rax, adr);
6829 }
6830 
6831 
6832 void Assembler::fld_s(int index) {
6833   emit_farith(0xD9, 0xC0, index);
6834 }
6835 
6836 void Assembler::fld_x(Address adr) {
6837   InstructionMark im(this);
6838   emit_int8((unsigned char)0xDB);
6839   emit_operand32(rbp, adr);
6840 }
6841 
6842 void Assembler::fldcw(Address src) {
6843   InstructionMark im(this);
6844   emit_int8((unsigned char)0xD9);
6845   emit_operand32(rbp, src);
6846 }
6847 
6848 void Assembler::fldenv(Address src) {
6849   InstructionMark im(this);
6850   emit_int8((unsigned char)0xD9);
6851   emit_operand32(rsp, src);
6852 }
6853 
6854 void Assembler::fldlg2() {
6855   emit_int8((unsigned char)0xD9);
6856   emit_int8((unsigned char)0xEC);
6857 }
6858 
6859 void Assembler::fldln2() {
6860   emit_int8((unsigned char)0xD9);
6861   emit_int8((unsigned char)0xED);
6862 }
6863 
6864 void Assembler::fldz() {
6865   emit_int8((unsigned char)0xD9);
6866   emit_int8((unsigned char)0xEE);
6867 }
6868 
6869 void Assembler::flog() {
6870   fldln2();
6871   fxch();
6872   fyl2x();
6873 }
6874 
6875 void Assembler::flog10() {
6876   fldlg2();
6877   fxch();
6878   fyl2x();
6879 }
6880 
6881 void Assembler::fmul(int i) {
6882   emit_farith(0xD8, 0xC8, i);
6883 }
6884 
6885 void Assembler::fmul_d(Address src) {
6886   InstructionMark im(this);
6887   emit_int8((unsigned char)0xDC);
6888   emit_operand32(rcx, src);
6889 }
6890 
6891 void Assembler::fmul_s(Address src) {
6892   InstructionMark im(this);
6893   emit_int8((unsigned char)0xD8);
6894   emit_operand32(rcx, src);
6895 }
6896 
6897 void Assembler::fmula(int i) {
6898   emit_farith(0xDC, 0xC8, i);
6899 }
6900 
6901 void Assembler::fmulp(int i) {
6902   emit_farith(0xDE, 0xC8, i);
6903 }
6904 
6905 void Assembler::fnsave(Address dst) {
6906   InstructionMark im(this);
6907   emit_int8((unsigned char)0xDD);
6908   emit_operand32(rsi, dst);
6909 }
6910 
6911 void Assembler::fnstcw(Address src) {
6912   InstructionMark im(this);
6913   emit_int8((unsigned char)0x9B);
6914   emit_int8((unsigned char)0xD9);
6915   emit_operand32(rdi, src);
6916 }
6917 
6918 void Assembler::fnstsw_ax() {
6919   emit_int8((unsigned char)0xDF);
6920   emit_int8((unsigned char)0xE0);
6921 }
6922 
6923 void Assembler::fprem() {
6924   emit_int8((unsigned char)0xD9);
6925   emit_int8((unsigned char)0xF8);
6926 }
6927 
6928 void Assembler::fprem1() {
6929   emit_int8((unsigned char)0xD9);
6930   emit_int8((unsigned char)0xF5);
6931 }
6932 
6933 void Assembler::frstor(Address src) {
6934   InstructionMark im(this);
6935   emit_int8((unsigned char)0xDD);
6936   emit_operand32(rsp, src);
6937 }
6938 
6939 void Assembler::fsin() {
6940   emit_int8((unsigned char)0xD9);
6941   emit_int8((unsigned char)0xFE);
6942 }
6943 
6944 void Assembler::fsqrt() {
6945   emit_int8((unsigned char)0xD9);
6946   emit_int8((unsigned char)0xFA);
6947 }
6948 
6949 void Assembler::fst_d(Address adr) {
6950   InstructionMark im(this);
6951   emit_int8((unsigned char)0xDD);
6952   emit_operand32(rdx, adr);
6953 }
6954 
6955 void Assembler::fst_s(Address adr) {
6956   InstructionMark im(this);
6957   emit_int8((unsigned char)0xD9);
6958   emit_operand32(rdx, adr);
6959 }
6960 
6961 void Assembler::fstp_d(Address adr) {
6962   InstructionMark im(this);
6963   emit_int8((unsigned char)0xDD);
6964   emit_operand32(rbx, adr);
6965 }
6966 
6967 void Assembler::fstp_d(int index) {
6968   emit_farith(0xDD, 0xD8, index);
6969 }
6970 
6971 void Assembler::fstp_s(Address adr) {
6972   InstructionMark im(this);
6973   emit_int8((unsigned char)0xD9);
6974   emit_operand32(rbx, adr);
6975 }
6976 
6977 void Assembler::fstp_x(Address adr) {
6978   InstructionMark im(this);
6979   emit_int8((unsigned char)0xDB);
6980   emit_operand32(rdi, adr);
6981 }
6982 
6983 void Assembler::fsub(int i) {
6984   emit_farith(0xD8, 0xE0, i);
6985 }
6986 
6987 void Assembler::fsub_d(Address src) {
6988   InstructionMark im(this);
6989   emit_int8((unsigned char)0xDC);
6990   emit_operand32(rsp, src);
6991 }
6992 
6993 void Assembler::fsub_s(Address src) {
6994   InstructionMark im(this);
6995   emit_int8((unsigned char)0xD8);
6996   emit_operand32(rsp, src);
6997 }
6998 
6999 void Assembler::fsuba(int i) {
7000   emit_farith(0xDC, 0xE8, i);
7001 }
7002 
7003 void Assembler::fsubp(int i) {
7004   emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
7005 }
7006 
7007 void Assembler::fsubr(int i) {
7008   emit_farith(0xD8, 0xE8, i);
7009 }
7010 
7011 void Assembler::fsubr_d(Address src) {
7012   InstructionMark im(this);
7013   emit_int8((unsigned char)0xDC);
7014   emit_operand32(rbp, src);
7015 }
7016 
7017 void Assembler::fsubr_s(Address src) {
7018   InstructionMark im(this);
7019   emit_int8((unsigned char)0xD8);
7020   emit_operand32(rbp, src);
7021 }
7022 
7023 void Assembler::fsubra(int i) {
7024   emit_farith(0xDC, 0xE0, i);
7025 }
7026 
7027 void Assembler::fsubrp(int i) {
7028   emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
7029 }
7030 
7031 void Assembler::ftan() {
7032   emit_int8((unsigned char)0xD9);
7033   emit_int8((unsigned char)0xF2);
7034   emit_int8((unsigned char)0xDD);
7035   emit_int8((unsigned char)0xD8);
7036 }
7037 
7038 void Assembler::ftst() {
7039   emit_int8((unsigned char)0xD9);
7040   emit_int8((unsigned char)0xE4);
7041 }
7042 
7043 void Assembler::fucomi(int i) {
7044   // make sure the instruction is supported (introduced for P6, together with cmov)
7045   guarantee(VM_Version::supports_cmov(), "illegal instruction");
7046   emit_farith(0xDB, 0xE8, i);
7047 }
7048 
7049 void Assembler::fucomip(int i) {
7050   // make sure the instruction is supported (introduced for P6, together with cmov)
7051   guarantee(VM_Version::supports_cmov(), "illegal instruction");
7052   emit_farith(0xDF, 0xE8, i);
7053 }
7054 
7055 void Assembler::fwait() {
7056   emit_int8((unsigned char)0x9B);
7057 }
7058 
7059 void Assembler::fxch(int i) {
7060   emit_farith(0xD9, 0xC8, i);
7061 }
7062 
7063 void Assembler::fyl2x() {
7064   emit_int8((unsigned char)0xD9);
7065   emit_int8((unsigned char)0xF1);
7066 }
7067 
7068 void Assembler::frndint() {
7069   emit_int8((unsigned char)0xD9);
7070   emit_int8((unsigned char)0xFC);
7071 }
7072 
7073 void Assembler::f2xm1() {
7074   emit_int8((unsigned char)0xD9);
7075   emit_int8((unsigned char)0xF0);
7076 }
7077 
7078 void Assembler::fldl2e() {
7079   emit_int8((unsigned char)0xD9);
7080   emit_int8((unsigned char)0xEA);
7081 }
7082 
7083 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
7084 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
7085 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
7086 static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
7087 
7088 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
7089 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
7090   if (pre > 0) {
7091     emit_int8(simd_pre[pre]);
7092   }
7093   if (rex_w) {
7094     prefixq(adr, xreg);
7095   } else {
7096     prefix(adr, xreg);
7097   }
7098   if (opc > 0) {
7099     emit_int8(0x0F);
7100     int opc2 = simd_opc[opc];
7101     if (opc2 > 0) {
7102       emit_int8(opc2);
7103     }
7104   }
7105 }
7106 
7107 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
7108   if (pre > 0) {
7109     emit_int8(simd_pre[pre]);
7110   }
7111   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : prefix_and_encode(dst_enc, src_enc);
7112   if (opc > 0) {
7113     emit_int8(0x0F);
7114     int opc2 = simd_opc[opc];
7115     if (opc2 > 0) {
7116       emit_int8(opc2);
7117     }
7118   }
7119   return encode;
7120 }
7121 
7122 
7123 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc) {
7124   int vector_len = _attributes->get_vector_len();
7125   bool vex_w = _attributes->is_rex_vex_w();
7126   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
7127     prefix(VEX_3bytes);
7128 
7129     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
7130     byte1 = (~byte1) & 0xE0;
7131     byte1 |= opc;
7132     emit_int8(byte1);
7133 
7134     int byte2 = ((~nds_enc) & 0xf) << 3;
7135     byte2 |= (vex_w ? VEX_W : 0) | ((vector_len > 0) ? 4 : 0) | pre;
7136     emit_int8(byte2);
7137   } else {
7138     prefix(VEX_2bytes);
7139 
7140     int byte1 = vex_r ? VEX_R : 0;
7141     byte1 = (~byte1) & 0x80;
7142     byte1 |= ((~nds_enc) & 0xf) << 3;
7143     byte1 |= ((vector_len > 0 ) ? 4 : 0) | pre;
7144     emit_int8(byte1);
7145   }
7146 }
7147 
7148 // This is a 4 byte encoding
7149 void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v, int nds_enc, VexSimdPrefix pre, VexOpcode opc){
7150   // EVEX 0x62 prefix
7151   prefix(EVEX_4bytes);
7152   bool vex_w = _attributes->is_rex_vex_w();
7153   int evex_encoding = (vex_w ? VEX_W : 0);
7154   // EVEX.b is not currently used for broadcast of single element or data rounding modes
7155   _attributes->set_evex_encoding(evex_encoding);
7156 
7157   // P0: byte 2, initialized to RXBR`00mm
7158   // instead of not'd
7159   int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0);
7160   byte2 = (~byte2) & 0xF0;
7161   // confine opc opcode extensions in mm bits to lower two bits
7162   // of form {0F, 0F_38, 0F_3A}
7163   byte2 |= opc;
7164   emit_int8(byte2);
7165 
7166   // P1: byte 3 as Wvvvv1pp
7167   int byte3 = ((~nds_enc) & 0xf) << 3;
7168   // p[10] is always 1
7169   byte3 |= EVEX_F;
7170   byte3 |= (vex_w & 1) << 7;
7171   // confine pre opcode extensions in pp bits to lower two bits
7172   // of form {66, F3, F2}
7173   byte3 |= pre;
7174   emit_int8(byte3);
7175 
7176   // P2: byte 4 as zL'Lbv'aaa
7177   // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
7178   int byte4 = (_attributes->is_no_reg_mask()) ?
7179               0 :
7180               _attributes->get_embedded_opmask_register_specifier();
7181   // EVEX.v` for extending EVEX.vvvv or VIDX
7182   byte4 |= (evex_v ? 0: EVEX_V);
7183   // third EXEC.b for broadcast actions
7184   byte4 |= (_attributes->is_extended_context() ? EVEX_Rb : 0);
7185   // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
7186   byte4 |= ((_attributes->get_vector_len())& 0x3) << 5;
7187   // last is EVEX.z for zero/merge actions
7188   byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0);
7189   emit_int8(byte4);
7190 }
7191 
7192 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
7193   bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0;
7194   bool vex_b = adr.base_needs_rex();
7195   bool vex_x = adr.index_needs_rex();
7196   set_attributes(attributes);
7197   attributes->set_current_assembler(this);
7198 
7199   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
7200   if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
7201     switch (attributes->get_vector_len()) {
7202     case AVX_128bit:
7203     case AVX_256bit:
7204       attributes->set_is_legacy_mode();
7205       break;
7206     }
7207   }
7208 
7209   // For pure EVEX check and see if this instruction
7210   // is allowed in legacy mode and has resources which will
7211   // fit in it.  Pure EVEX instructions will use set_is_evex_instruction in their definition,
7212   // else that field is set when we encode to EVEX
7213   if (UseAVX > 2 && !attributes->is_legacy_mode() &&
7214       !_is_managed && !attributes->is_evex_instruction()) {
7215     if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
7216       bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
7217       if (check_register_bank) {
7218         // check nds_enc and xreg_enc for upper bank usage
7219         if (nds_enc < 16 && xreg_enc < 16) {
7220           attributes->set_is_legacy_mode();
7221         }
7222       } else {
7223         attributes->set_is_legacy_mode();
7224       }
7225     }
7226   }
7227 
7228   _is_managed = false;
7229   if (UseAVX > 2 && !attributes->is_legacy_mode())
7230   {
7231     bool evex_r = (xreg_enc >= 16);
7232     bool evex_v = (nds_enc >= 16);
7233     attributes->set_is_evex_instruction();
7234     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
7235   } else {
7236     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
7237       attributes->set_rex_vex_w(false);
7238     }
7239     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
7240   }
7241 }
7242 
7243 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
7244   bool vex_r = ((dst_enc & 8) == 8) ? 1 : 0;
7245   bool vex_b = ((src_enc & 8) == 8) ? 1 : 0;
7246   bool vex_x = false;
7247   set_attributes(attributes);
7248   attributes->set_current_assembler(this);
7249   bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
7250 
7251   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
7252   if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
7253     switch (attributes->get_vector_len()) {
7254     case AVX_128bit:
7255     case AVX_256bit:
7256       if (check_register_bank) {
7257         if (dst_enc >= 16 || nds_enc >= 16 || src_enc >= 16) {
7258           // up propagate arithmetic instructions to meet RA requirements
7259           attributes->set_vector_len(AVX_512bit);
7260         } else {
7261           attributes->set_is_legacy_mode();
7262         }
7263       } else {
7264         attributes->set_is_legacy_mode();
7265       }
7266       break;
7267     }
7268   }
7269 
7270   // For pure EVEX check and see if this instruction
7271   // is allowed in legacy mode and has resources which will
7272   // fit in it.  Pure EVEX instructions will use set_is_evex_instruction in their definition,
7273   // else that field is set when we encode to EVEX
7274   if (UseAVX > 2 && !attributes->is_legacy_mode() &&
7275       !_is_managed && !attributes->is_evex_instruction()) {
7276     if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
7277       if (check_register_bank) {
7278         // check dst_enc, nds_enc and src_enc for upper bank usage
7279         if (dst_enc < 16 && nds_enc < 16 && src_enc < 16) {
7280           attributes->set_is_legacy_mode();
7281         }
7282       } else {
7283         attributes->set_is_legacy_mode();
7284       }
7285     }
7286   }
7287 
7288   _is_managed = false;
7289   if (UseAVX > 2 && !attributes->is_legacy_mode())
7290   {
7291     bool evex_r = (dst_enc >= 16);
7292     bool evex_v = (nds_enc >= 16);
7293     // can use vex_x as bank extender on rm encoding
7294     vex_x = (src_enc >= 16);
7295     attributes->set_is_evex_instruction();
7296     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
7297   } else {
7298     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
7299       attributes->set_rex_vex_w(false);
7300     }
7301     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
7302   }
7303 
7304   // return modrm byte components for operands
7305   return (((dst_enc & 7) << 3) | (src_enc & 7));
7306 }
7307 
7308 
7309 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
7310                             VexOpcode opc, InstructionAttr *attributes) {
7311   if (UseAVX > 0) {
7312     int xreg_enc = xreg->encoding();
7313     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
7314     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, attributes);
7315   } else {
7316     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
7317     rex_prefix(adr, xreg, pre, opc, attributes->is_rex_vex_w());
7318   }
7319 }
7320 
7321 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
7322                                       VexOpcode opc, InstructionAttr *attributes) {
7323   int dst_enc = dst->encoding();
7324   int src_enc = src->encoding();
7325   if (UseAVX > 0) {
7326     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
7327     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes);
7328   } else {
7329     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
7330     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, attributes->is_rex_vex_w());
7331   }
7332 }
7333 
7334 void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
7335   assert(VM_Version::supports_avx(), "");
7336   assert(!VM_Version::supports_evex(), "");
7337   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7338   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7339   emit_int8((unsigned char)0xC2);
7340   emit_int8((unsigned char)(0xC0 | encode));
7341   emit_int8((unsigned char)(0xF & cop));
7342 }
7343 
7344 void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
7345   assert(VM_Version::supports_avx(), "");
7346   assert(!VM_Version::supports_evex(), "");
7347   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7348   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7349   emit_int8((unsigned char)0x4B);
7350   emit_int8((unsigned char)(0xC0 | encode));
7351   int src2_enc = src2->encoding();
7352   emit_int8((unsigned char)(0xF0 & src2_enc<<4));
7353 }
7354 
7355 void Assembler::shlxl(Register dst, Register src1, Register src2) {
7356   assert(VM_Version::supports_bmi2(), "");
7357   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7358   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7359   emit_int8((unsigned char)0xF7);
7360   emit_int8((unsigned char)(0xC0 | encode));
7361 }
7362 
7363 void Assembler::shlxq(Register dst, Register src1, Register src2) {
7364   assert(VM_Version::supports_bmi2(), "");
7365   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7366   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7367   emit_int8((unsigned char)0xF7);
7368   emit_int8((unsigned char)(0xC0 | encode));
7369 }
7370 
7371 #ifndef _LP64
7372 
7373 void Assembler::incl(Register dst) {
7374   // Don't use it directly. Use MacroAssembler::incrementl() instead.
7375   emit_int8(0x40 | dst->encoding());
7376 }
7377 
7378 void Assembler::lea(Register dst, Address src) {
7379   leal(dst, src);
7380 }
7381 
7382 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) {
7383   InstructionMark im(this);
7384   emit_int8((unsigned char)0xC7);
7385   emit_operand(rax, dst);
7386   emit_data((int)imm32, rspec, 0);
7387 }
7388 
7389 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
7390   InstructionMark im(this);
7391   int encode = prefix_and_encode(dst->encoding());
7392   emit_int8((unsigned char)(0xB8 | encode));
7393   emit_data((int)imm32, rspec, 0);
7394 }
7395 
7396 void Assembler::popa() { // 32bit
7397   emit_int8(0x61);
7398 }
7399 
7400 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
7401   InstructionMark im(this);
7402   emit_int8(0x68);
7403   emit_data(imm32, rspec, 0);
7404 }
7405 
7406 void Assembler::pusha() { // 32bit
7407   emit_int8(0x60);
7408 }
7409 
7410 void Assembler::set_byte_if_not_zero(Register dst) {
7411   emit_int8(0x0F);
7412   emit_int8((unsigned char)0x95);
7413   emit_int8((unsigned char)(0xE0 | dst->encoding()));
7414 }
7415 
7416 void Assembler::shldl(Register dst, Register src) {
7417   emit_int8(0x0F);
7418   emit_int8((unsigned char)0xA5);
7419   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7420 }
7421 
7422 // 0F A4 / r ib
7423 void Assembler::shldl(Register dst, Register src, int8_t imm8) {
7424   emit_int8(0x0F);
7425   emit_int8((unsigned char)0xA4);
7426   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7427   emit_int8(imm8);
7428 }
7429 
7430 void Assembler::shrdl(Register dst, Register src) {
7431   emit_int8(0x0F);
7432   emit_int8((unsigned char)0xAD);
7433   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7434 }
7435 
7436 #else // LP64
7437 
7438 void Assembler::set_byte_if_not_zero(Register dst) {
7439   int enc = prefix_and_encode(dst->encoding(), true);
7440   emit_int8(0x0F);
7441   emit_int8((unsigned char)0x95);
7442   emit_int8((unsigned char)(0xE0 | enc));
7443 }
7444 
7445 // 64bit only pieces of the assembler
7446 // This should only be used by 64bit instructions that can use rip-relative
7447 // it cannot be used by instructions that want an immediate value.
7448 
7449 bool Assembler::reachable(AddressLiteral adr) {
7450   int64_t disp;
7451   // None will force a 64bit literal to the code stream. Likely a placeholder
7452   // for something that will be patched later and we need to certain it will
7453   // always be reachable.
7454   if (adr.reloc() == relocInfo::none) {
7455     return false;
7456   }
7457   if (adr.reloc() == relocInfo::internal_word_type) {
7458     // This should be rip relative and easily reachable.
7459     return true;
7460   }
7461   if (adr.reloc() == relocInfo::virtual_call_type ||
7462       adr.reloc() == relocInfo::opt_virtual_call_type ||
7463       adr.reloc() == relocInfo::static_call_type ||
7464       adr.reloc() == relocInfo::static_stub_type ) {
7465     // This should be rip relative within the code cache and easily
7466     // reachable until we get huge code caches. (At which point
7467     // ic code is going to have issues).
7468     return true;
7469   }
7470   if (adr.reloc() != relocInfo::external_word_type &&
7471       adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
7472       adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
7473       adr.reloc() != relocInfo::runtime_call_type ) {
7474     return false;
7475   }
7476 
7477   // Stress the correction code
7478   if (ForceUnreachable) {
7479     // Must be runtimecall reloc, see if it is in the codecache
7480     // Flipping stuff in the codecache to be unreachable causes issues
7481     // with things like inline caches where the additional instructions
7482     // are not handled.
7483     if (CodeCache::find_blob(adr._target) == NULL) {
7484       return false;
7485     }
7486   }
7487   // For external_word_type/runtime_call_type if it is reachable from where we
7488   // are now (possibly a temp buffer) and where we might end up
7489   // anywhere in the codeCache then we are always reachable.
7490   // This would have to change if we ever save/restore shared code
7491   // to be more pessimistic.
7492   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
7493   if (!is_simm32(disp)) return false;
7494   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
7495   if (!is_simm32(disp)) return false;
7496 
7497   disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
7498 
7499   // Because rip relative is a disp + address_of_next_instruction and we
7500   // don't know the value of address_of_next_instruction we apply a fudge factor
7501   // to make sure we will be ok no matter the size of the instruction we get placed into.
7502   // We don't have to fudge the checks above here because they are already worst case.
7503 
7504   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
7505   // + 4 because better safe than sorry.
7506   const int fudge = 12 + 4;
7507   if (disp < 0) {
7508     disp -= fudge;
7509   } else {
7510     disp += fudge;
7511   }
7512   return is_simm32(disp);
7513 }
7514 
7515 // Check if the polling page is not reachable from the code cache using rip-relative
7516 // addressing.
7517 bool Assembler::is_polling_page_far() {
7518   intptr_t addr = (intptr_t)os::get_polling_page();
7519   return ForceUnreachable ||
7520          !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
7521          !is_simm32(addr - (intptr_t)CodeCache::high_bound());
7522 }
7523 
7524 void Assembler::emit_data64(jlong data,
7525                             relocInfo::relocType rtype,
7526                             int format) {
7527   if (rtype == relocInfo::none) {
7528     emit_int64(data);
7529   } else {
7530     emit_data64(data, Relocation::spec_simple(rtype), format);
7531   }
7532 }
7533 
7534 void Assembler::emit_data64(jlong data,
7535                             RelocationHolder const& rspec,
7536                             int format) {
7537   assert(imm_operand == 0, "default format must be immediate in this file");
7538   assert(imm_operand == format, "must be immediate");
7539   assert(inst_mark() != NULL, "must be inside InstructionMark");
7540   // Do not use AbstractAssembler::relocate, which is not intended for
7541   // embedded words.  Instead, relocate to the enclosing instruction.
7542   code_section()->relocate(inst_mark(), rspec, format);
7543 #ifdef ASSERT
7544   check_relocation(rspec, format);
7545 #endif
7546   emit_int64(data);
7547 }
7548 
7549 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
7550   if (reg_enc >= 8) {
7551     prefix(REX_B);
7552     reg_enc -= 8;
7553   } else if (byteinst && reg_enc >= 4) {
7554     prefix(REX);
7555   }
7556   return reg_enc;
7557 }
7558 
7559 int Assembler::prefixq_and_encode(int reg_enc) {
7560   if (reg_enc < 8) {
7561     prefix(REX_W);
7562   } else {
7563     prefix(REX_WB);
7564     reg_enc -= 8;
7565   }
7566   return reg_enc;
7567 }
7568 
7569 int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte) {
7570   if (dst_enc < 8) {
7571     if (src_enc >= 8) {
7572       prefix(REX_B);
7573       src_enc -= 8;
7574     } else if ((src_is_byte && src_enc >= 4) || (dst_is_byte && dst_enc >= 4)) {
7575       prefix(REX);
7576     }
7577   } else {
7578     if (src_enc < 8) {
7579       prefix(REX_R);
7580     } else {
7581       prefix(REX_RB);
7582       src_enc -= 8;
7583     }
7584     dst_enc -= 8;
7585   }
7586   return dst_enc << 3 | src_enc;
7587 }
7588 
7589 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
7590   if (dst_enc < 8) {
7591     if (src_enc < 8) {
7592       prefix(REX_W);
7593     } else {
7594       prefix(REX_WB);
7595       src_enc -= 8;
7596     }
7597   } else {
7598     if (src_enc < 8) {
7599       prefix(REX_WR);
7600     } else {
7601       prefix(REX_WRB);
7602       src_enc -= 8;
7603     }
7604     dst_enc -= 8;
7605   }
7606   return dst_enc << 3 | src_enc;
7607 }
7608 
7609 void Assembler::prefix(Register reg) {
7610   if (reg->encoding() >= 8) {
7611     prefix(REX_B);
7612   }
7613 }
7614 
7615 void Assembler::prefix(Register dst, Register src, Prefix p) {
7616   if (src->encoding() >= 8) {
7617     p = (Prefix)(p | REX_B);
7618   }
7619   if (dst->encoding() >= 8) {
7620     p = (Prefix)( p | REX_R);
7621   }
7622   if (p != Prefix_EMPTY) {
7623     // do not generate an empty prefix
7624     prefix(p);
7625   }
7626 }
7627 
7628 void Assembler::prefix(Register dst, Address adr, Prefix p) {
7629   if (adr.base_needs_rex()) {
7630     if (adr.index_needs_rex()) {
7631       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
7632     } else {
7633       prefix(REX_B);
7634     }
7635   } else {
7636     if (adr.index_needs_rex()) {
7637       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
7638     }
7639   }
7640   if (dst->encoding() >= 8) {
7641     p = (Prefix)(p | REX_R);
7642   }
7643   if (p != Prefix_EMPTY) {
7644     // do not generate an empty prefix
7645     prefix(p);
7646   }
7647 }
7648 
7649 void Assembler::prefix(Address adr) {
7650   if (adr.base_needs_rex()) {
7651     if (adr.index_needs_rex()) {
7652       prefix(REX_XB);
7653     } else {
7654       prefix(REX_B);
7655     }
7656   } else {
7657     if (adr.index_needs_rex()) {
7658       prefix(REX_X);
7659     }
7660   }
7661 }
7662 
7663 void Assembler::prefixq(Address adr) {
7664   if (adr.base_needs_rex()) {
7665     if (adr.index_needs_rex()) {
7666       prefix(REX_WXB);
7667     } else {
7668       prefix(REX_WB);
7669     }
7670   } else {
7671     if (adr.index_needs_rex()) {
7672       prefix(REX_WX);
7673     } else {
7674       prefix(REX_W);
7675     }
7676   }
7677 }
7678 
7679 
7680 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
7681   if (reg->encoding() < 8) {
7682     if (adr.base_needs_rex()) {
7683       if (adr.index_needs_rex()) {
7684         prefix(REX_XB);
7685       } else {
7686         prefix(REX_B);
7687       }
7688     } else {
7689       if (adr.index_needs_rex()) {
7690         prefix(REX_X);
7691       } else if (byteinst && reg->encoding() >= 4 ) {
7692         prefix(REX);
7693       }
7694     }
7695   } else {
7696     if (adr.base_needs_rex()) {
7697       if (adr.index_needs_rex()) {
7698         prefix(REX_RXB);
7699       } else {
7700         prefix(REX_RB);
7701       }
7702     } else {
7703       if (adr.index_needs_rex()) {
7704         prefix(REX_RX);
7705       } else {
7706         prefix(REX_R);
7707       }
7708     }
7709   }
7710 }
7711 
7712 void Assembler::prefixq(Address adr, Register src) {
7713   if (src->encoding() < 8) {
7714     if (adr.base_needs_rex()) {
7715       if (adr.index_needs_rex()) {
7716         prefix(REX_WXB);
7717       } else {
7718         prefix(REX_WB);
7719       }
7720     } else {
7721       if (adr.index_needs_rex()) {
7722         prefix(REX_WX);
7723       } else {
7724         prefix(REX_W);
7725       }
7726     }
7727   } else {
7728     if (adr.base_needs_rex()) {
7729       if (adr.index_needs_rex()) {
7730         prefix(REX_WRXB);
7731       } else {
7732         prefix(REX_WRB);
7733       }
7734     } else {
7735       if (adr.index_needs_rex()) {
7736         prefix(REX_WRX);
7737       } else {
7738         prefix(REX_WR);
7739       }
7740     }
7741   }
7742 }
7743 
7744 void Assembler::prefix(Address adr, XMMRegister reg) {
7745   if (reg->encoding() < 8) {
7746     if (adr.base_needs_rex()) {
7747       if (adr.index_needs_rex()) {
7748         prefix(REX_XB);
7749       } else {
7750         prefix(REX_B);
7751       }
7752     } else {
7753       if (adr.index_needs_rex()) {
7754         prefix(REX_X);
7755       }
7756     }
7757   } else {
7758     if (adr.base_needs_rex()) {
7759       if (adr.index_needs_rex()) {
7760         prefix(REX_RXB);
7761       } else {
7762         prefix(REX_RB);
7763       }
7764     } else {
7765       if (adr.index_needs_rex()) {
7766         prefix(REX_RX);
7767       } else {
7768         prefix(REX_R);
7769       }
7770     }
7771   }
7772 }
7773 
7774 void Assembler::prefixq(Address adr, XMMRegister src) {
7775   if (src->encoding() < 8) {
7776     if (adr.base_needs_rex()) {
7777       if (adr.index_needs_rex()) {
7778         prefix(REX_WXB);
7779       } else {
7780         prefix(REX_WB);
7781       }
7782     } else {
7783       if (adr.index_needs_rex()) {
7784         prefix(REX_WX);
7785       } else {
7786         prefix(REX_W);
7787       }
7788     }
7789   } else {
7790     if (adr.base_needs_rex()) {
7791       if (adr.index_needs_rex()) {
7792         prefix(REX_WRXB);
7793       } else {
7794         prefix(REX_WRB);
7795       }
7796     } else {
7797       if (adr.index_needs_rex()) {
7798         prefix(REX_WRX);
7799       } else {
7800         prefix(REX_WR);
7801       }
7802     }
7803   }
7804 }
7805 
7806 void Assembler::adcq(Register dst, int32_t imm32) {
7807   (void) prefixq_and_encode(dst->encoding());
7808   emit_arith(0x81, 0xD0, dst, imm32);
7809 }
7810 
7811 void Assembler::adcq(Register dst, Address src) {
7812   InstructionMark im(this);
7813   prefixq(src, dst);
7814   emit_int8(0x13);
7815   emit_operand(dst, src);
7816 }
7817 
7818 void Assembler::adcq(Register dst, Register src) {
7819   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7820   emit_arith(0x13, 0xC0, dst, src);
7821 }
7822 
7823 void Assembler::addq(Address dst, int32_t imm32) {
7824   InstructionMark im(this);
7825   prefixq(dst);
7826   emit_arith_operand(0x81, rax, dst,imm32);
7827 }
7828 
7829 void Assembler::addq(Address dst, Register src) {
7830   InstructionMark im(this);
7831   prefixq(dst, src);
7832   emit_int8(0x01);
7833   emit_operand(src, dst);
7834 }
7835 
7836 void Assembler::addq(Register dst, int32_t imm32) {
7837   (void) prefixq_and_encode(dst->encoding());
7838   emit_arith(0x81, 0xC0, dst, imm32);
7839 }
7840 
7841 void Assembler::addq(Register dst, Address src) {
7842   InstructionMark im(this);
7843   prefixq(src, dst);
7844   emit_int8(0x03);
7845   emit_operand(dst, src);
7846 }
7847 
7848 void Assembler::addq(Register dst, Register src) {
7849   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7850   emit_arith(0x03, 0xC0, dst, src);
7851 }
7852 
7853 void Assembler::adcxq(Register dst, Register src) {
7854   //assert(VM_Version::supports_adx(), "adx instructions not supported");
7855   emit_int8((unsigned char)0x66);
7856   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7857   emit_int8(0x0F);
7858   emit_int8(0x38);
7859   emit_int8((unsigned char)0xF6);
7860   emit_int8((unsigned char)(0xC0 | encode));
7861 }
7862 
7863 void Assembler::adoxq(Register dst, Register src) {
7864   //assert(VM_Version::supports_adx(), "adx instructions not supported");
7865   emit_int8((unsigned char)0xF3);
7866   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7867   emit_int8(0x0F);
7868   emit_int8(0x38);
7869   emit_int8((unsigned char)0xF6);
7870   emit_int8((unsigned char)(0xC0 | encode));
7871 }
7872 
7873 void Assembler::andq(Address dst, int32_t imm32) {
7874   InstructionMark im(this);
7875   prefixq(dst);
7876   emit_int8((unsigned char)0x81);
7877   emit_operand(rsp, dst, 4);
7878   emit_int32(imm32);
7879 }
7880 
7881 void Assembler::andq(Register dst, int32_t imm32) {
7882   (void) prefixq_and_encode(dst->encoding());
7883   emit_arith(0x81, 0xE0, dst, imm32);
7884 }
7885 
7886 void Assembler::andq(Register dst, Address src) {
7887   InstructionMark im(this);
7888   prefixq(src, dst);
7889   emit_int8(0x23);
7890   emit_operand(dst, src);
7891 }
7892 
7893 void Assembler::andq(Register dst, Register src) {
7894   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7895   emit_arith(0x23, 0xC0, dst, src);
7896 }
7897 
7898 void Assembler::andnq(Register dst, Register src1, Register src2) {
7899   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
7900   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7901   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
7902   emit_int8((unsigned char)0xF2);
7903   emit_int8((unsigned char)(0xC0 | encode));
7904 }
7905 
7906 void Assembler::andnq(Register dst, Register src1, Address src2) {
7907   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
7908   InstructionMark im(this);
7909   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7910   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
7911   emit_int8((unsigned char)0xF2);
7912   emit_operand(dst, src2);
7913 }
7914 
7915 void Assembler::bsfq(Register dst, Register src) {
7916   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7917   emit_int8(0x0F);
7918   emit_int8((unsigned char)0xBC);
7919   emit_int8((unsigned char)(0xC0 | encode));
7920 }
7921 
7922 void Assembler::bsrq(Register dst, Register src) {
7923   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7924   emit_int8(0x0F);
7925   emit_int8((unsigned char)0xBD);
7926   emit_int8((unsigned char)(0xC0 | encode));
7927 }
7928 
7929 void Assembler::bswapq(Register reg) {
7930   int encode = prefixq_and_encode(reg->encoding());
7931   emit_int8(0x0F);
7932   emit_int8((unsigned char)(0xC8 | encode));
7933 }
7934 
7935 void Assembler::blsiq(Register dst, Register src) {
7936   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
7937   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7938   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
7939   emit_int8((unsigned char)0xF3);
7940   emit_int8((unsigned char)(0xC0 | encode));
7941 }
7942 
7943 void Assembler::blsiq(Register dst, Address src) {
7944   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
7945   InstructionMark im(this);
7946   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7947   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
7948   emit_int8((unsigned char)0xF3);
7949   emit_operand(rbx, src);
7950 }
7951 
7952 void Assembler::blsmskq(Register dst, Register src) {
7953   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
7954   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7955   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
7956   emit_int8((unsigned char)0xF3);
7957   emit_int8((unsigned char)(0xC0 | encode));
7958 }
7959 
7960 void Assembler::blsmskq(Register dst, Address src) {
7961   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
7962   InstructionMark im(this);
7963   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7964   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
7965   emit_int8((unsigned char)0xF3);
7966   emit_operand(rdx, src);
7967 }
7968 
7969 void Assembler::blsrq(Register dst, Register src) {
7970   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
7971   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7972   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
7973   emit_int8((unsigned char)0xF3);
7974   emit_int8((unsigned char)(0xC0 | encode));
7975 }
7976 
7977 void Assembler::blsrq(Register dst, Address src) {
7978   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
7979   InstructionMark im(this);
7980   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7981   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
7982   emit_int8((unsigned char)0xF3);
7983   emit_operand(rcx, src);
7984 }
7985 
7986 void Assembler::cdqq() {
7987   prefix(REX_W);
7988   emit_int8((unsigned char)0x99);
7989 }
7990 
7991 void Assembler::clflush(Address adr) {
7992   prefix(adr);
7993   emit_int8(0x0F);
7994   emit_int8((unsigned char)0xAE);
7995   emit_operand(rdi, adr);
7996 }
7997 
7998 void Assembler::cmovq(Condition cc, Register dst, Register src) {
7999   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8000   emit_int8(0x0F);
8001   emit_int8(0x40 | cc);
8002   emit_int8((unsigned char)(0xC0 | encode));
8003 }
8004 
8005 void Assembler::cmovq(Condition cc, Register dst, Address src) {
8006   InstructionMark im(this);
8007   prefixq(src, dst);
8008   emit_int8(0x0F);
8009   emit_int8(0x40 | cc);
8010   emit_operand(dst, src);
8011 }
8012 
8013 void Assembler::cmpq(Address dst, int32_t imm32) {
8014   InstructionMark im(this);
8015   prefixq(dst);
8016   emit_int8((unsigned char)0x81);
8017   emit_operand(rdi, dst, 4);
8018   emit_int32(imm32);
8019 }
8020 
8021 void Assembler::cmpq(Register dst, int32_t imm32) {
8022   (void) prefixq_and_encode(dst->encoding());
8023   emit_arith(0x81, 0xF8, dst, imm32);
8024 }
8025 
8026 void Assembler::cmpq(Address dst, Register src) {
8027   InstructionMark im(this);
8028   prefixq(dst, src);
8029   emit_int8(0x3B);
8030   emit_operand(src, dst);
8031 }
8032 
8033 void Assembler::cmpq(Register dst, Register src) {
8034   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8035   emit_arith(0x3B, 0xC0, dst, src);
8036 }
8037 
8038 void Assembler::cmpq(Register dst, Address  src) {
8039   InstructionMark im(this);
8040   prefixq(src, dst);
8041   emit_int8(0x3B);
8042   emit_operand(dst, src);
8043 }
8044 
8045 void Assembler::cmpxchgq(Register reg, Address adr) {
8046   InstructionMark im(this);
8047   prefixq(adr, reg);
8048   emit_int8(0x0F);
8049   emit_int8((unsigned char)0xB1);
8050   emit_operand(reg, adr);
8051 }
8052 
8053 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
8054   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8055   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8056   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8057   emit_int8(0x2A);
8058   emit_int8((unsigned char)(0xC0 | encode));
8059 }
8060 
8061 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
8062   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8063   InstructionMark im(this);
8064   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8065   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
8066   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8067   emit_int8(0x2A);
8068   emit_operand(dst, src);
8069 }
8070 
8071 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
8072   NOT_LP64(assert(VM_Version::supports_sse(), ""));
8073   InstructionMark im(this);
8074   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8075   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
8076   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
8077   emit_int8(0x2A);
8078   emit_operand(dst, src);
8079 }
8080 
8081 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
8082   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8083   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8084   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8085   emit_int8(0x2C);
8086   emit_int8((unsigned char)(0xC0 | encode));
8087 }
8088 
8089 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
8090   NOT_LP64(assert(VM_Version::supports_sse(), ""));
8091   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8092   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
8093   emit_int8(0x2C);
8094   emit_int8((unsigned char)(0xC0 | encode));
8095 }
8096 
8097 void Assembler::decl(Register dst) {
8098   // Don't use it directly. Use MacroAssembler::decrementl() instead.
8099   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
8100   int encode = prefix_and_encode(dst->encoding());
8101   emit_int8((unsigned char)0xFF);
8102   emit_int8((unsigned char)(0xC8 | encode));
8103 }
8104 
8105 void Assembler::decq(Register dst) {
8106   // Don't use it directly. Use MacroAssembler::decrementq() instead.
8107   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8108   int encode = prefixq_and_encode(dst->encoding());
8109   emit_int8((unsigned char)0xFF);
8110   emit_int8(0xC8 | encode);
8111 }
8112 
8113 void Assembler::decq(Address dst) {
8114   // Don't use it directly. Use MacroAssembler::decrementq() instead.
8115   InstructionMark im(this);
8116   prefixq(dst);
8117   emit_int8((unsigned char)0xFF);
8118   emit_operand(rcx, dst);
8119 }
8120 
8121 void Assembler::fxrstor(Address src) {
8122   prefixq(src);
8123   emit_int8(0x0F);
8124   emit_int8((unsigned char)0xAE);
8125   emit_operand(as_Register(1), src);
8126 }
8127 
8128 void Assembler::xrstor(Address src) {
8129   prefixq(src);
8130   emit_int8(0x0F);
8131   emit_int8((unsigned char)0xAE);
8132   emit_operand(as_Register(5), src);
8133 }
8134 
8135 void Assembler::fxsave(Address dst) {
8136   prefixq(dst);
8137   emit_int8(0x0F);
8138   emit_int8((unsigned char)0xAE);
8139   emit_operand(as_Register(0), dst);
8140 }
8141 
8142 void Assembler::xsave(Address dst) {
8143   prefixq(dst);
8144   emit_int8(0x0F);
8145   emit_int8((unsigned char)0xAE);
8146   emit_operand(as_Register(4), dst);
8147 }
8148 
8149 void Assembler::idivq(Register src) {
8150   int encode = prefixq_and_encode(src->encoding());
8151   emit_int8((unsigned char)0xF7);
8152   emit_int8((unsigned char)(0xF8 | encode));
8153 }
8154 
8155 void Assembler::imulq(Register dst, Register src) {
8156   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8157   emit_int8(0x0F);
8158   emit_int8((unsigned char)0xAF);
8159   emit_int8((unsigned char)(0xC0 | encode));
8160 }
8161 
8162 void Assembler::imulq(Register dst, Register src, int value) {
8163   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8164   if (is8bit(value)) {
8165     emit_int8(0x6B);
8166     emit_int8((unsigned char)(0xC0 | encode));
8167     emit_int8(value & 0xFF);
8168   } else {
8169     emit_int8(0x69);
8170     emit_int8((unsigned char)(0xC0 | encode));
8171     emit_int32(value);
8172   }
8173 }
8174 
8175 void Assembler::imulq(Register dst, Address src) {
8176   InstructionMark im(this);
8177   prefixq(src, dst);
8178   emit_int8(0x0F);
8179   emit_int8((unsigned char) 0xAF);
8180   emit_operand(dst, src);
8181 }
8182 
8183 void Assembler::incl(Register dst) {
8184   // Don't use it directly. Use MacroAssembler::incrementl() instead.
8185   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8186   int encode = prefix_and_encode(dst->encoding());
8187   emit_int8((unsigned char)0xFF);
8188   emit_int8((unsigned char)(0xC0 | encode));
8189 }
8190 
8191 void Assembler::incq(Register dst) {
8192   // Don't use it directly. Use MacroAssembler::incrementq() instead.
8193   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8194   int encode = prefixq_and_encode(dst->encoding());
8195   emit_int8((unsigned char)0xFF);
8196   emit_int8((unsigned char)(0xC0 | encode));
8197 }
8198 
8199 void Assembler::incq(Address dst) {
8200   // Don't use it directly. Use MacroAssembler::incrementq() instead.
8201   InstructionMark im(this);
8202   prefixq(dst);
8203   emit_int8((unsigned char)0xFF);
8204   emit_operand(rax, dst);
8205 }
8206 
8207 void Assembler::lea(Register dst, Address src) {
8208   leaq(dst, src);
8209 }
8210 
8211 void Assembler::leaq(Register dst, Address src) {
8212   InstructionMark im(this);
8213   prefixq(src, dst);
8214   emit_int8((unsigned char)0x8D);
8215   emit_operand(dst, src);
8216 }
8217 
8218 void Assembler::mov64(Register dst, int64_t imm64) {
8219   InstructionMark im(this);
8220   int encode = prefixq_and_encode(dst->encoding());
8221   emit_int8((unsigned char)(0xB8 | encode));
8222   emit_int64(imm64);
8223 }
8224 
8225 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
8226   InstructionMark im(this);
8227   int encode = prefixq_and_encode(dst->encoding());
8228   emit_int8(0xB8 | encode);
8229   emit_data64(imm64, rspec);
8230 }
8231 
8232 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
8233   InstructionMark im(this);
8234   int encode = prefix_and_encode(dst->encoding());
8235   emit_int8((unsigned char)(0xB8 | encode));
8236   emit_data((int)imm32, rspec, narrow_oop_operand);
8237 }
8238 
8239 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
8240   InstructionMark im(this);
8241   prefix(dst);
8242   emit_int8((unsigned char)0xC7);
8243   emit_operand(rax, dst, 4);
8244   emit_data((int)imm32, rspec, narrow_oop_operand);
8245 }
8246 
8247 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
8248   InstructionMark im(this);
8249   int encode = prefix_and_encode(src1->encoding());
8250   emit_int8((unsigned char)0x81);
8251   emit_int8((unsigned char)(0xF8 | encode));
8252   emit_data((int)imm32, rspec, narrow_oop_operand);
8253 }
8254 
8255 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
8256   InstructionMark im(this);
8257   prefix(src1);
8258   emit_int8((unsigned char)0x81);
8259   emit_operand(rax, src1, 4);
8260   emit_data((int)imm32, rspec, narrow_oop_operand);
8261 }
8262 
8263 void Assembler::lzcntq(Register dst, Register src) {
8264   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
8265   emit_int8((unsigned char)0xF3);
8266   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8267   emit_int8(0x0F);
8268   emit_int8((unsigned char)0xBD);
8269   emit_int8((unsigned char)(0xC0 | encode));
8270 }
8271 
8272 void Assembler::movdq(XMMRegister dst, Register src) {
8273   // table D-1 says MMX/SSE2
8274   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8275   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8276   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8277   emit_int8(0x6E);
8278   emit_int8((unsigned char)(0xC0 | encode));
8279 }
8280 
8281 void Assembler::movdq(Register dst, XMMRegister src) {
8282   // table D-1 says MMX/SSE2
8283   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8284   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8285   // swap src/dst to get correct prefix
8286   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8287   emit_int8(0x7E);
8288   emit_int8((unsigned char)(0xC0 | encode));
8289 }
8290 
8291 void Assembler::movq(Register dst, Register src) {
8292   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8293   emit_int8((unsigned char)0x8B);
8294   emit_int8((unsigned char)(0xC0 | encode));
8295 }
8296 
8297 void Assembler::movq(Register dst, Address src) {
8298   InstructionMark im(this);
8299   prefixq(src, dst);
8300   emit_int8((unsigned char)0x8B);
8301   emit_operand(dst, src);
8302 }
8303 
8304 void Assembler::movq(Address dst, Register src) {
8305   InstructionMark im(this);
8306   prefixq(dst, src);
8307   emit_int8((unsigned char)0x89);
8308   emit_operand(src, dst);
8309 }
8310 
8311 void Assembler::movsbq(Register dst, Address src) {
8312   InstructionMark im(this);
8313   prefixq(src, dst);
8314   emit_int8(0x0F);
8315   emit_int8((unsigned char)0xBE);
8316   emit_operand(dst, src);
8317 }
8318 
8319 void Assembler::movsbq(Register dst, Register src) {
8320   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8321   emit_int8(0x0F);
8322   emit_int8((unsigned char)0xBE);
8323   emit_int8((unsigned char)(0xC0 | encode));
8324 }
8325 
8326 void Assembler::movslq(Register dst, int32_t imm32) {
8327   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
8328   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
8329   // as a result we shouldn't use until tested at runtime...
8330   ShouldNotReachHere();
8331   InstructionMark im(this);
8332   int encode = prefixq_and_encode(dst->encoding());
8333   emit_int8((unsigned char)(0xC7 | encode));
8334   emit_int32(imm32);
8335 }
8336 
8337 void Assembler::movslq(Address dst, int32_t imm32) {
8338   assert(is_simm32(imm32), "lost bits");
8339   InstructionMark im(this);
8340   prefixq(dst);
8341   emit_int8((unsigned char)0xC7);
8342   emit_operand(rax, dst, 4);
8343   emit_int32(imm32);
8344 }
8345 
8346 void Assembler::movslq(Register dst, Address src) {
8347   InstructionMark im(this);
8348   prefixq(src, dst);
8349   emit_int8(0x63);
8350   emit_operand(dst, src);
8351 }
8352 
8353 void Assembler::movslq(Register dst, Register src) {
8354   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8355   emit_int8(0x63);
8356   emit_int8((unsigned char)(0xC0 | encode));
8357 }
8358 
8359 void Assembler::movswq(Register dst, Address src) {
8360   InstructionMark im(this);
8361   prefixq(src, dst);
8362   emit_int8(0x0F);
8363   emit_int8((unsigned char)0xBF);
8364   emit_operand(dst, src);
8365 }
8366 
8367 void Assembler::movswq(Register dst, Register src) {
8368   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8369   emit_int8((unsigned char)0x0F);
8370   emit_int8((unsigned char)0xBF);
8371   emit_int8((unsigned char)(0xC0 | encode));
8372 }
8373 
8374 void Assembler::movzbq(Register dst, Address src) {
8375   InstructionMark im(this);
8376   prefixq(src, dst);
8377   emit_int8((unsigned char)0x0F);
8378   emit_int8((unsigned char)0xB6);
8379   emit_operand(dst, src);
8380 }
8381 
8382 void Assembler::movzbq(Register dst, Register src) {
8383   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8384   emit_int8(0x0F);
8385   emit_int8((unsigned char)0xB6);
8386   emit_int8(0xC0 | encode);
8387 }
8388 
8389 void Assembler::movzwq(Register dst, Address src) {
8390   InstructionMark im(this);
8391   prefixq(src, dst);
8392   emit_int8((unsigned char)0x0F);
8393   emit_int8((unsigned char)0xB7);
8394   emit_operand(dst, src);
8395 }
8396 
8397 void Assembler::movzwq(Register dst, Register src) {
8398   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8399   emit_int8((unsigned char)0x0F);
8400   emit_int8((unsigned char)0xB7);
8401   emit_int8((unsigned char)(0xC0 | encode));
8402 }
8403 
8404 void Assembler::mulq(Address src) {
8405   InstructionMark im(this);
8406   prefixq(src);
8407   emit_int8((unsigned char)0xF7);
8408   emit_operand(rsp, src);
8409 }
8410 
8411 void Assembler::mulq(Register src) {
8412   int encode = prefixq_and_encode(src->encoding());
8413   emit_int8((unsigned char)0xF7);
8414   emit_int8((unsigned char)(0xE0 | encode));
8415 }
8416 
8417 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
8418   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8419   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
8420   int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
8421   emit_int8((unsigned char)0xF6);
8422   emit_int8((unsigned char)(0xC0 | encode));
8423 }
8424 
8425 void Assembler::negq(Register dst) {
8426   int encode = prefixq_and_encode(dst->encoding());
8427   emit_int8((unsigned char)0xF7);
8428   emit_int8((unsigned char)(0xD8 | encode));
8429 }
8430 
8431 void Assembler::notq(Register dst) {
8432   int encode = prefixq_and_encode(dst->encoding());
8433   emit_int8((unsigned char)0xF7);
8434   emit_int8((unsigned char)(0xD0 | encode));
8435 }
8436 
8437 void Assembler::orq(Address dst, int32_t imm32) {
8438   InstructionMark im(this);
8439   prefixq(dst);
8440   emit_int8((unsigned char)0x81);
8441   emit_operand(rcx, dst, 4);
8442   emit_int32(imm32);
8443 }
8444 
8445 void Assembler::orq(Register dst, int32_t imm32) {
8446   (void) prefixq_and_encode(dst->encoding());
8447   emit_arith(0x81, 0xC8, dst, imm32);
8448 }
8449 
8450 void Assembler::orq(Register dst, Address src) {
8451   InstructionMark im(this);
8452   prefixq(src, dst);
8453   emit_int8(0x0B);
8454   emit_operand(dst, src);
8455 }
8456 
8457 void Assembler::orq(Register dst, Register src) {
8458   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8459   emit_arith(0x0B, 0xC0, dst, src);
8460 }
8461 
8462 void Assembler::popa() { // 64bit
8463   movq(r15, Address(rsp, 0));
8464   movq(r14, Address(rsp, wordSize));
8465   movq(r13, Address(rsp, 2 * wordSize));
8466   movq(r12, Address(rsp, 3 * wordSize));
8467   movq(r11, Address(rsp, 4 * wordSize));
8468   movq(r10, Address(rsp, 5 * wordSize));
8469   movq(r9,  Address(rsp, 6 * wordSize));
8470   movq(r8,  Address(rsp, 7 * wordSize));
8471   movq(rdi, Address(rsp, 8 * wordSize));
8472   movq(rsi, Address(rsp, 9 * wordSize));
8473   movq(rbp, Address(rsp, 10 * wordSize));
8474   // skip rsp
8475   movq(rbx, Address(rsp, 12 * wordSize));
8476   movq(rdx, Address(rsp, 13 * wordSize));
8477   movq(rcx, Address(rsp, 14 * wordSize));
8478   movq(rax, Address(rsp, 15 * wordSize));
8479 
8480   addq(rsp, 16 * wordSize);
8481 }
8482 
8483 void Assembler::popcntq(Register dst, Address src) {
8484   assert(VM_Version::supports_popcnt(), "must support");
8485   InstructionMark im(this);
8486   emit_int8((unsigned char)0xF3);
8487   prefixq(src, dst);
8488   emit_int8((unsigned char)0x0F);
8489   emit_int8((unsigned char)0xB8);
8490   emit_operand(dst, src);
8491 }
8492 
8493 void Assembler::popcntq(Register dst, Register src) {
8494   assert(VM_Version::supports_popcnt(), "must support");
8495   emit_int8((unsigned char)0xF3);
8496   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8497   emit_int8((unsigned char)0x0F);
8498   emit_int8((unsigned char)0xB8);
8499   emit_int8((unsigned char)(0xC0 | encode));
8500 }
8501 
8502 void Assembler::popq(Address dst) {
8503   InstructionMark im(this);
8504   prefixq(dst);
8505   emit_int8((unsigned char)0x8F);
8506   emit_operand(rax, dst);
8507 }
8508 
8509 void Assembler::pusha() { // 64bit
8510   // we have to store original rsp.  ABI says that 128 bytes
8511   // below rsp are local scratch.
8512   movq(Address(rsp, -5 * wordSize), rsp);
8513 
8514   subq(rsp, 16 * wordSize);
8515 
8516   movq(Address(rsp, 15 * wordSize), rax);
8517   movq(Address(rsp, 14 * wordSize), rcx);
8518   movq(Address(rsp, 13 * wordSize), rdx);
8519   movq(Address(rsp, 12 * wordSize), rbx);
8520   // skip rsp
8521   movq(Address(rsp, 10 * wordSize), rbp);
8522   movq(Address(rsp, 9 * wordSize), rsi);
8523   movq(Address(rsp, 8 * wordSize), rdi);
8524   movq(Address(rsp, 7 * wordSize), r8);
8525   movq(Address(rsp, 6 * wordSize), r9);
8526   movq(Address(rsp, 5 * wordSize), r10);
8527   movq(Address(rsp, 4 * wordSize), r11);
8528   movq(Address(rsp, 3 * wordSize), r12);
8529   movq(Address(rsp, 2 * wordSize), r13);
8530   movq(Address(rsp, wordSize), r14);
8531   movq(Address(rsp, 0), r15);
8532 }
8533 
8534 void Assembler::pushq(Address src) {
8535   InstructionMark im(this);
8536   prefixq(src);
8537   emit_int8((unsigned char)0xFF);
8538   emit_operand(rsi, src);
8539 }
8540 
8541 void Assembler::rclq(Register dst, int imm8) {
8542   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8543   int encode = prefixq_and_encode(dst->encoding());
8544   if (imm8 == 1) {
8545     emit_int8((unsigned char)0xD1);
8546     emit_int8((unsigned char)(0xD0 | encode));
8547   } else {
8548     emit_int8((unsigned char)0xC1);
8549     emit_int8((unsigned char)(0xD0 | encode));
8550     emit_int8(imm8);
8551   }
8552 }
8553 
8554 void Assembler::rcrq(Register dst, int imm8) {
8555   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8556   int encode = prefixq_and_encode(dst->encoding());
8557   if (imm8 == 1) {
8558     emit_int8((unsigned char)0xD1);
8559     emit_int8((unsigned char)(0xD8 | encode));
8560   } else {
8561     emit_int8((unsigned char)0xC1);
8562     emit_int8((unsigned char)(0xD8 | encode));
8563     emit_int8(imm8);
8564   }
8565 }
8566 
8567 void Assembler::rorq(Register dst, int imm8) {
8568   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8569   int encode = prefixq_and_encode(dst->encoding());
8570   if (imm8 == 1) {
8571     emit_int8((unsigned char)0xD1);
8572     emit_int8((unsigned char)(0xC8 | encode));
8573   } else {
8574     emit_int8((unsigned char)0xC1);
8575     emit_int8((unsigned char)(0xc8 | encode));
8576     emit_int8(imm8);
8577   }
8578 }
8579 
8580 void Assembler::rorxq(Register dst, Register src, int imm8) {
8581   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8582   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
8583   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
8584   emit_int8((unsigned char)0xF0);
8585   emit_int8((unsigned char)(0xC0 | encode));
8586   emit_int8(imm8);
8587 }
8588 
8589 void Assembler::rorxd(Register dst, Register src, int imm8) {
8590   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8591   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
8592   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
8593   emit_int8((unsigned char)0xF0);
8594   emit_int8((unsigned char)(0xC0 | encode));
8595   emit_int8(imm8);
8596 }
8597 
8598 void Assembler::sarq(Register dst, int imm8) {
8599   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8600   int encode = prefixq_and_encode(dst->encoding());
8601   if (imm8 == 1) {
8602     emit_int8((unsigned char)0xD1);
8603     emit_int8((unsigned char)(0xF8 | encode));
8604   } else {
8605     emit_int8((unsigned char)0xC1);
8606     emit_int8((unsigned char)(0xF8 | encode));
8607     emit_int8(imm8);
8608   }
8609 }
8610 
8611 void Assembler::sarq(Register dst) {
8612   int encode = prefixq_and_encode(dst->encoding());
8613   emit_int8((unsigned char)0xD3);
8614   emit_int8((unsigned char)(0xF8 | encode));
8615 }
8616 
8617 void Assembler::sbbq(Address dst, int32_t imm32) {
8618   InstructionMark im(this);
8619   prefixq(dst);
8620   emit_arith_operand(0x81, rbx, dst, imm32);
8621 }
8622 
8623 void Assembler::sbbq(Register dst, int32_t imm32) {
8624   (void) prefixq_and_encode(dst->encoding());
8625   emit_arith(0x81, 0xD8, dst, imm32);
8626 }
8627 
8628 void Assembler::sbbq(Register dst, Address src) {
8629   InstructionMark im(this);
8630   prefixq(src, dst);
8631   emit_int8(0x1B);
8632   emit_operand(dst, src);
8633 }
8634 
8635 void Assembler::sbbq(Register dst, Register src) {
8636   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8637   emit_arith(0x1B, 0xC0, dst, src);
8638 }
8639 
8640 void Assembler::shlq(Register dst, int imm8) {
8641   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8642   int encode = prefixq_and_encode(dst->encoding());
8643   if (imm8 == 1) {
8644     emit_int8((unsigned char)0xD1);
8645     emit_int8((unsigned char)(0xE0 | encode));
8646   } else {
8647     emit_int8((unsigned char)0xC1);
8648     emit_int8((unsigned char)(0xE0 | encode));
8649     emit_int8(imm8);
8650   }
8651 }
8652 
8653 void Assembler::shlq(Register dst) {
8654   int encode = prefixq_and_encode(dst->encoding());
8655   emit_int8((unsigned char)0xD3);
8656   emit_int8((unsigned char)(0xE0 | encode));
8657 }
8658 
8659 void Assembler::shrq(Register dst, int imm8) {
8660   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8661   int encode = prefixq_and_encode(dst->encoding());
8662   emit_int8((unsigned char)0xC1);
8663   emit_int8((unsigned char)(0xE8 | encode));
8664   emit_int8(imm8);
8665 }
8666 
8667 void Assembler::shrq(Register dst) {
8668   int encode = prefixq_and_encode(dst->encoding());
8669   emit_int8((unsigned char)0xD3);
8670   emit_int8(0xE8 | encode);
8671 }
8672 
8673 void Assembler::subq(Address dst, int32_t imm32) {
8674   InstructionMark im(this);
8675   prefixq(dst);
8676   emit_arith_operand(0x81, rbp, dst, imm32);
8677 }
8678 
8679 void Assembler::subq(Address dst, Register src) {
8680   InstructionMark im(this);
8681   prefixq(dst, src);
8682   emit_int8(0x29);
8683   emit_operand(src, dst);
8684 }
8685 
8686 void Assembler::subq(Register dst, int32_t imm32) {
8687   (void) prefixq_and_encode(dst->encoding());
8688   emit_arith(0x81, 0xE8, dst, imm32);
8689 }
8690 
8691 // Force generation of a 4 byte immediate value even if it fits into 8bit
8692 void Assembler::subq_imm32(Register dst, int32_t imm32) {
8693   (void) prefixq_and_encode(dst->encoding());
8694   emit_arith_imm32(0x81, 0xE8, dst, imm32);
8695 }
8696 
8697 void Assembler::subq(Register dst, Address src) {
8698   InstructionMark im(this);
8699   prefixq(src, dst);
8700   emit_int8(0x2B);
8701   emit_operand(dst, src);
8702 }
8703 
8704 void Assembler::subq(Register dst, Register src) {
8705   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8706   emit_arith(0x2B, 0xC0, dst, src);
8707 }
8708 
8709 void Assembler::testq(Register dst, int32_t imm32) {
8710   // not using emit_arith because test
8711   // doesn't support sign-extension of
8712   // 8bit operands
8713   int encode = dst->encoding();
8714   if (encode == 0) {
8715     prefix(REX_W);
8716     emit_int8((unsigned char)0xA9);
8717   } else {
8718     encode = prefixq_and_encode(encode);
8719     emit_int8((unsigned char)0xF7);
8720     emit_int8((unsigned char)(0xC0 | encode));
8721   }
8722   emit_int32(imm32);
8723 }
8724 
8725 void Assembler::testq(Register dst, Register src) {
8726   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8727   emit_arith(0x85, 0xC0, dst, src);
8728 }
8729 
8730 void Assembler::xaddq(Address dst, Register src) {
8731   InstructionMark im(this);
8732   prefixq(dst, src);
8733   emit_int8(0x0F);
8734   emit_int8((unsigned char)0xC1);
8735   emit_operand(src, dst);
8736 }
8737 
8738 void Assembler::xchgq(Register dst, Address src) {
8739   InstructionMark im(this);
8740   prefixq(src, dst);
8741   emit_int8((unsigned char)0x87);
8742   emit_operand(dst, src);
8743 }
8744 
8745 void Assembler::xchgq(Register dst, Register src) {
8746   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8747   emit_int8((unsigned char)0x87);
8748   emit_int8((unsigned char)(0xc0 | encode));
8749 }
8750 
8751 void Assembler::xorq(Register dst, Register src) {
8752   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8753   emit_arith(0x33, 0xC0, dst, src);
8754 }
8755 
8756 void Assembler::xorq(Register dst, Address src) {
8757   InstructionMark im(this);
8758   prefixq(src, dst);
8759   emit_int8(0x33);
8760   emit_operand(dst, src);
8761 }
8762 
8763 #endif // !LP64