New src/cpu/x86/vm/assembler

   1 /*
   2  * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "gc/shared/cardTableModRefBS.hpp"
  29 #include "gc/shared/collectedHeap.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "memory/resourceArea.hpp"
  32 #include "prims/methodHandles.hpp"
  33 #include "runtime/biasedLocking.hpp"
  34 #include "runtime/interfaceSupport.hpp"
  35 #include "runtime/objectMonitor.hpp"
  36 #include "runtime/os.hpp"
  37 #include "runtime/sharedRuntime.hpp"
  38 #include "runtime/stubRoutines.hpp"
  39 #include "utilities/macros.hpp"
  40 #if INCLUDE_ALL_GCS
  41 #include "gc/g1/g1CollectedHeap.inline.hpp"
  42 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
  43 #include "gc/g1/heapRegion.hpp"
  44 #endif // INCLUDE_ALL_GCS
  45 
  46 #ifdef PRODUCT
  47 #define BLOCK_COMMENT(str) /* nothing */
  48 #define STOP(error) stop(error)
  49 #else
  50 #define BLOCK_COMMENT(str) block_comment(str)
  51 #define STOP(error) block_comment(error); stop(error)
  52 #endif
  53 
  54 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  55 // Implementation of AddressLiteral
  56 
  57 // A 2-D table for managing compressed displacement(disp8) on EVEX enabled platforms.
  58 unsigned char tuple_table[Assembler::EVEX_ETUP + 1][Assembler::AVX_512bit + 1] = {
  59   // -----------------Table 4.5 -------------------- //
  60   16, 32, 64,  // EVEX_FV(0)
  61   4,  4,  4,   // EVEX_FV(1) - with Evex.b
  62   16, 32, 64,  // EVEX_FV(2) - with Evex.w
  63   8,  8,  8,   // EVEX_FV(3) - with Evex.w and Evex.b
  64   8,  16, 32,  // EVEX_HV(0)
  65   4,  4,  4,   // EVEX_HV(1) - with Evex.b
  66   // -----------------Table 4.6 -------------------- //
  67   16, 32, 64,  // EVEX_FVM(0)
  68   1,  1,  1,   // EVEX_T1S(0)
  69   2,  2,  2,   // EVEX_T1S(1)
  70   4,  4,  4,   // EVEX_T1S(2)
  71   8,  8,  8,   // EVEX_T1S(3)
  72   4,  4,  4,   // EVEX_T1F(0)
  73   8,  8,  8,   // EVEX_T1F(1)
  74   8,  8,  8,   // EVEX_T2(0)
  75   0,  16, 16,  // EVEX_T2(1)
  76   0,  16, 16,  // EVEX_T4(0)
  77   0,  0,  32,  // EVEX_T4(1)
  78   0,  0,  32,  // EVEX_T8(0)
  79   8,  16, 32,  // EVEX_HVM(0)
  80   4,  8,  16,  // EVEX_QVM(0)
  81   2,  4,  8,   // EVEX_OVM(0)
  82   16, 16, 16,  // EVEX_M128(0)
  83   8,  32, 64,  // EVEX_DUP(0)
  84   0,  0,  0    // EVEX_NTUP
  85 };
  86 
  87 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  88   _is_lval = false;
  89   _target = target;
  90   switch (rtype) {
  91   case relocInfo::oop_type:
  92   case relocInfo::metadata_type:
  93     // Oops are a special case. Normally they would be their own section
  94     // but in cases like icBuffer they are literals in the code stream that
  95     // we don't have a section for. We use none so that we get a literal address
  96     // which is always patchable.
  97     break;
  98   case relocInfo::external_word_type:
  99     _rspec = external_word_Relocation::spec(target);
 100     break;
 101   case relocInfo::internal_word_type:
 102     _rspec = internal_word_Relocation::spec(target);
 103     break;
 104   case relocInfo::opt_virtual_call_type:
 105     _rspec = opt_virtual_call_Relocation::spec();
 106     break;
 107   case relocInfo::static_call_type:
 108     _rspec = static_call_Relocation::spec();
 109     break;
 110   case relocInfo::runtime_call_type:
 111     _rspec = runtime_call_Relocation::spec();
 112     break;
 113   case relocInfo::poll_type:
 114   case relocInfo::poll_return_type:
 115     _rspec = Relocation::spec_simple(rtype);
 116     break;
 117   case relocInfo::none:
 118     break;
 119   default:
 120     ShouldNotReachHere();
 121     break;
 122   }
 123 }
 124 
 125 // Implementation of Address
 126 
 127 #ifdef _LP64
 128 
 129 Address Address::make_array(ArrayAddress adr) {
 130   // Not implementable on 64bit machines
 131   // Should have been handled higher up the call chain.
 132   ShouldNotReachHere();
 133   return Address();
 134 }
 135 
 136 // exceedingly dangerous constructor
 137 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
 138   _base  = noreg;
 139   _index = noreg;
 140   _scale = no_scale;
 141   _disp  = disp;
 142   switch (rtype) {
 143     case relocInfo::external_word_type:
 144       _rspec = external_word_Relocation::spec(loc);
 145       break;
 146     case relocInfo::internal_word_type:
 147       _rspec = internal_word_Relocation::spec(loc);
 148       break;
 149     case relocInfo::runtime_call_type:
 150       // HMM
 151       _rspec = runtime_call_Relocation::spec();
 152       break;
 153     case relocInfo::poll_type:
 154     case relocInfo::poll_return_type:
 155       _rspec = Relocation::spec_simple(rtype);
 156       break;
 157     case relocInfo::none:
 158       break;
 159     default:
 160       ShouldNotReachHere();
 161   }
 162 }
 163 #else // LP64
 164 
 165 Address Address::make_array(ArrayAddress adr) {
 166   AddressLiteral base = adr.base();
 167   Address index = adr.index();
 168   assert(index._disp == 0, "must not have disp"); // maybe it can?
 169   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
 170   array._rspec = base._rspec;
 171   return array;
 172 }
 173 
 174 // exceedingly dangerous constructor
 175 Address::Address(address loc, RelocationHolder spec) {
 176   _base  = noreg;
 177   _index = noreg;
 178   _scale = no_scale;
 179   _disp  = (intptr_t) loc;
 180   _rspec = spec;
 181 }
 182 
 183 #endif // _LP64
 184 
 185 
 186 
 187 // Convert the raw encoding form into the form expected by the constructor for
 188 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 189 // that to noreg for the Address constructor.
 190 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
 191   RelocationHolder rspec;
 192   if (disp_reloc != relocInfo::none) {
 193     rspec = Relocation::spec_simple(disp_reloc);
 194   }
 195   bool valid_index = index != rsp->encoding();
 196   if (valid_index) {
 197     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 198     madr._rspec = rspec;
 199     return madr;
 200   } else {
 201     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 202     madr._rspec = rspec;
 203     return madr;
 204   }
 205 }
 206 
 207 // Implementation of Assembler
 208 
 209 int AbstractAssembler::code_fill_byte() {
 210   return (u_char)'\xF4'; // hlt
 211 }
 212 
 213 // make this go away someday
 214 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
 215   if (rtype == relocInfo::none)
 216     emit_int32(data);
 217   else
 218     emit_data(data, Relocation::spec_simple(rtype), format);
 219 }
 220 
 221 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
 222   assert(imm_operand == 0, "default format must be immediate in this file");
 223   assert(inst_mark() != NULL, "must be inside InstructionMark");
 224   if (rspec.type() !=  relocInfo::none) {
 225     #ifdef ASSERT
 226       check_relocation(rspec, format);
 227     #endif
 228     // Do not use AbstractAssembler::relocate, which is not intended for
 229     // embedded words.  Instead, relocate to the enclosing instruction.
 230 
 231     // hack. call32 is too wide for mask so use disp32
 232     if (format == call32_operand)
 233       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 234     else
 235       code_section()->relocate(inst_mark(), rspec, format);
 236   }
 237   emit_int32(data);
 238 }
 239 
 240 static int encode(Register r) {
 241   int enc = r->encoding();
 242   if (enc >= 8) {
 243     enc -= 8;
 244   }
 245   return enc;
 246 }
 247 
 248 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
 249   assert(dst->has_byte_register(), "must have byte register");
 250   assert(isByte(op1) && isByte(op2), "wrong opcode");
 251   assert(isByte(imm8), "not a byte");
 252   assert((op1 & 0x01) == 0, "should be 8bit operation");
 253   emit_int8(op1);
 254   emit_int8(op2 | encode(dst));
 255   emit_int8(imm8);
 256 }
 257 
 258 
 259 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
 260   assert(isByte(op1) && isByte(op2), "wrong opcode");
 261   assert((op1 & 0x01) == 1, "should be 32bit operation");
 262   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 263   if (is8bit(imm32)) {
 264     emit_int8(op1 | 0x02); // set sign bit
 265     emit_int8(op2 | encode(dst));
 266     emit_int8(imm32 & 0xFF);
 267   } else {
 268     emit_int8(op1);
 269     emit_int8(op2 | encode(dst));
 270     emit_int32(imm32);
 271   }
 272 }
 273 
 274 // Force generation of a 4 byte immediate value even if it fits into 8bit
 275 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
 276   assert(isByte(op1) && isByte(op2), "wrong opcode");
 277   assert((op1 & 0x01) == 1, "should be 32bit operation");
 278   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 279   emit_int8(op1);
 280   emit_int8(op2 | encode(dst));
 281   emit_int32(imm32);
 282 }
 283 
 284 // immediate-to-memory forms
 285 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
 286   assert((op1 & 0x01) == 1, "should be 32bit operation");
 287   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 288   if (is8bit(imm32)) {
 289     emit_int8(op1 | 0x02); // set sign bit
 290     emit_operand(rm, adr, 1);
 291     emit_int8(imm32 & 0xFF);
 292   } else {
 293     emit_int8(op1);
 294     emit_operand(rm, adr, 4);
 295     emit_int32(imm32);
 296   }
 297 }
 298 
 299 
 300 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 301   assert(isByte(op1) && isByte(op2), "wrong opcode");
 302   emit_int8(op1);
 303   emit_int8(op2 | encode(dst) << 3 | encode(src));
 304 }
 305 
 306 
 307 bool Assembler::query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
 308                                            int cur_tuple_type, int in_size_in_bits, int cur_encoding) {
 309   int mod_idx = 0;
 310   // We will test if the displacement fits the compressed format and if so
 311   // apply the compression to the displacment iff the result is8bit.
 312   if (VM_Version::supports_evex() && is_evex_inst) {
 313     switch (cur_tuple_type) {
 314     case EVEX_FV:
 315       if ((cur_encoding & VEX_W) == VEX_W) {
 316         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 317       } else {
 318         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 319       }
 320       break;
 321 
 322     case EVEX_HV:
 323       mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 324       break;
 325 
 326     case EVEX_FVM:
 327       break;
 328 
 329     case EVEX_T1S:
 330       switch (in_size_in_bits) {
 331       case EVEX_8bit:
 332         break;
 333 
 334       case EVEX_16bit:
 335         mod_idx = 1;
 336         break;
 337 
 338       case EVEX_32bit:
 339         mod_idx = 2;
 340         break;
 341 
 342       case EVEX_64bit:
 343         mod_idx = 3;
 344         break;
 345       }
 346       break;
 347 
 348     case EVEX_T1F:
 349     case EVEX_T2:
 350     case EVEX_T4:
 351       mod_idx = (in_size_in_bits == EVEX_64bit) ? 1 : 0;
 352       break;
 353 
 354     case EVEX_T8:
 355       break;
 356 
 357     case EVEX_HVM:
 358       break;
 359 
 360     case EVEX_QVM:
 361       break;
 362 
 363     case EVEX_OVM:
 364       break;
 365 
 366     case EVEX_M128:
 367       break;
 368 
 369     case EVEX_DUP:
 370       break;
 371 
 372     default:
 373       assert(0, "no valid evex tuple_table entry");
 374       break;
 375     }
 376 
 377     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 378       int disp_factor = tuple_table[cur_tuple_type + mod_idx][vector_len];
 379       if ((disp % disp_factor) == 0) {
 380         int new_disp = disp / disp_factor;
 381         if ((-0x80 <= new_disp && new_disp < 0x80)) {
 382           disp = new_disp;
 383         }
 384       } else {
 385         return false;
 386       }
 387     }
 388   }
 389   return (-0x80 <= disp && disp < 0x80);
 390 }
 391 
 392 
 393 bool Assembler::emit_compressed_disp_byte(int &disp) {
 394   int mod_idx = 0;
 395   // We will test if the displacement fits the compressed format and if so
 396   // apply the compression to the displacment iff the result is8bit.
 397   if (VM_Version::supports_evex() && _attributes && _attributes->is_evex_instruction()) {
 398     int evex_encoding = _attributes->get_evex_encoding();
 399     int tuple_type = _attributes->get_tuple_type();
 400     switch (tuple_type) {
 401     case EVEX_FV:
 402       if ((evex_encoding & VEX_W) == VEX_W) {
 403         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 404       } else {
 405         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 406       }
 407       break;
 408 
 409     case EVEX_HV:
 410       mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 411       break;
 412 
 413     case EVEX_FVM:
 414       break;
 415 
 416     case EVEX_T1S:
 417       switch (_attributes->get_input_size()) {
 418       case EVEX_8bit:
 419         break;
 420 
 421       case EVEX_16bit:
 422         mod_idx = 1;
 423         break;
 424 
 425       case EVEX_32bit:
 426         mod_idx = 2;
 427         break;
 428 
 429       case EVEX_64bit:
 430         mod_idx = 3;
 431         break;
 432       }
 433       break;
 434 
 435     case EVEX_T1F:
 436     case EVEX_T2:
 437     case EVEX_T4:
 438       mod_idx = (_attributes->get_input_size() == EVEX_64bit) ? 1 : 0;
 439       break;
 440 
 441     case EVEX_T8:
 442       break;
 443 
 444     case EVEX_HVM:
 445       break;
 446 
 447     case EVEX_QVM:
 448       break;
 449 
 450     case EVEX_OVM:
 451       break;
 452 
 453     case EVEX_M128:
 454       break;
 455 
 456     case EVEX_DUP:
 457       break;
 458 
 459     default:
 460       assert(0, "no valid evex tuple_table entry");
 461       break;
 462     }
 463 
 464     int vector_len = _attributes->get_vector_len();
 465     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 466       int disp_factor = tuple_table[tuple_type + mod_idx][vector_len];
 467       if ((disp % disp_factor) == 0) {
 468         int new_disp = disp / disp_factor;
 469         if (is8bit(new_disp)) {
 470           disp = new_disp;
 471         }
 472       } else {
 473         return false;
 474       }
 475     }
 476   }
 477   return is8bit(disp);
 478 }
 479 
 480 
 481 void Assembler::emit_operand(Register reg, Register base, Register index,
 482                              Address::ScaleFactor scale, int disp,
 483                              RelocationHolder const& rspec,
 484                              int rip_relative_correction) {
 485   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 486 
 487   // Encode the registers as needed in the fields they are used in
 488 
 489   int regenc = encode(reg) << 3;
 490   int indexenc = index->is_valid() ? encode(index) << 3 : 0;
 491   int baseenc = base->is_valid() ? encode(base) : 0;
 492 
 493   if (base->is_valid()) {
 494     if (index->is_valid()) {
 495       assert(scale != Address::no_scale, "inconsistent address");
 496       // [base + index*scale + disp]
 497       if (disp == 0 && rtype == relocInfo::none  &&
 498           base != rbp LP64_ONLY(&& base != r13)) {
 499         // [base + index*scale]
 500         // [00 reg 100][ss index base]
 501         assert(index != rsp, "illegal addressing mode");
 502         emit_int8(0x04 | regenc);
 503         emit_int8(scale << 6 | indexenc | baseenc);
 504       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 505         // [base + index*scale + imm8]
 506         // [01 reg 100][ss index base] imm8
 507         assert(index != rsp, "illegal addressing mode");
 508         emit_int8(0x44 | regenc);
 509         emit_int8(scale << 6 | indexenc | baseenc);
 510         emit_int8(disp & 0xFF);
 511       } else {
 512         // [base + index*scale + disp32]
 513         // [10 reg 100][ss index base] disp32
 514         assert(index != rsp, "illegal addressing mode");
 515         emit_int8(0x84 | regenc);
 516         emit_int8(scale << 6 | indexenc | baseenc);
 517         emit_data(disp, rspec, disp32_operand);
 518       }
 519     } else if (base == rsp LP64_ONLY(|| base == r12)) {
 520       // [rsp + disp]
 521       if (disp == 0 && rtype == relocInfo::none) {
 522         // [rsp]
 523         // [00 reg 100][00 100 100]
 524         emit_int8(0x04 | regenc);
 525         emit_int8(0x24);
 526       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 527         // [rsp + imm8]
 528         // [01 reg 100][00 100 100] disp8
 529         emit_int8(0x44 | regenc);
 530         emit_int8(0x24);
 531         emit_int8(disp & 0xFF);
 532       } else {
 533         // [rsp + imm32]
 534         // [10 reg 100][00 100 100] disp32
 535         emit_int8(0x84 | regenc);
 536         emit_int8(0x24);
 537         emit_data(disp, rspec, disp32_operand);
 538       }
 539     } else {
 540       // [base + disp]
 541       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
 542       if (disp == 0 && rtype == relocInfo::none &&
 543           base != rbp LP64_ONLY(&& base != r13)) {
 544         // [base]
 545         // [00 reg base]
 546         emit_int8(0x00 | regenc | baseenc);
 547       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 548         // [base + disp8]
 549         // [01 reg base] disp8
 550         emit_int8(0x40 | regenc | baseenc);
 551         emit_int8(disp & 0xFF);
 552       } else {
 553         // [base + disp32]
 554         // [10 reg base] disp32
 555         emit_int8(0x80 | regenc | baseenc);
 556         emit_data(disp, rspec, disp32_operand);
 557       }
 558     }
 559   } else {
 560     if (index->is_valid()) {
 561       assert(scale != Address::no_scale, "inconsistent address");
 562       // [index*scale + disp]
 563       // [00 reg 100][ss index 101] disp32
 564       assert(index != rsp, "illegal addressing mode");
 565       emit_int8(0x04 | regenc);
 566       emit_int8(scale << 6 | indexenc | 0x05);
 567       emit_data(disp, rspec, disp32_operand);
 568     } else if (rtype != relocInfo::none ) {
 569       // [disp] (64bit) RIP-RELATIVE (32bit) abs
 570       // [00 000 101] disp32
 571 
 572       emit_int8(0x05 | regenc);
 573       // Note that the RIP-rel. correction applies to the generated
 574       // disp field, but _not_ to the target address in the rspec.
 575 
 576       // disp was created by converting the target address minus the pc
 577       // at the start of the instruction. That needs more correction here.
 578       // intptr_t disp = target - next_ip;
 579       assert(inst_mark() != NULL, "must be inside InstructionMark");
 580       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 581       int64_t adjusted = disp;
 582       // Do rip-rel adjustment for 64bit
 583       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 584       assert(is_simm32(adjusted),
 585              "must be 32bit offset (RIP relative address)");
 586       emit_data((int32_t) adjusted, rspec, disp32_operand);
 587 
 588     } else {
 589       // 32bit never did this, did everything as the rip-rel/disp code above
 590       // [disp] ABSOLUTE
 591       // [00 reg 100][00 100 101] disp32
 592       emit_int8(0x04 | regenc);
 593       emit_int8(0x25);
 594       emit_data(disp, rspec, disp32_operand);
 595     }
 596   }
 597 }
 598 
 599 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 600                              Address::ScaleFactor scale, int disp,
 601                              RelocationHolder const& rspec) {
 602   if (UseAVX > 2) {
 603     int xreg_enc = reg->encoding();
 604     if (xreg_enc > 15) {
 605       XMMRegister new_reg = as_XMMRegister(xreg_enc & 0xf);
 606       emit_operand((Register)new_reg, base, index, scale, disp, rspec);
 607       return;
 608     }
 609   }
 610   emit_operand((Register)reg, base, index, scale, disp, rspec);
 611 }
 612 
 613 // Secret local extension to Assembler::WhichOperand:
 614 #define end_pc_operand (_WhichOperand_limit)
 615 
 616 address Assembler::locate_operand(address inst, WhichOperand which) {
 617   // Decode the given instruction, and return the address of
 618   // an embedded 32-bit operand word.
 619 
 620   // If "which" is disp32_operand, selects the displacement portion
 621   // of an effective address specifier.
 622   // If "which" is imm64_operand, selects the trailing immediate constant.
 623   // If "which" is call32_operand, selects the displacement of a call or jump.
 624   // Caller is responsible for ensuring that there is such an operand,
 625   // and that it is 32/64 bits wide.
 626 
 627   // If "which" is end_pc_operand, find the end of the instruction.
 628 
 629   address ip = inst;
 630   bool is_64bit = false;
 631 
 632   debug_only(bool has_disp32 = false);
 633   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
 634 
 635   again_after_prefix:
 636   switch (0xFF & *ip++) {
 637 
 638   // These convenience macros generate groups of "case" labels for the switch.
 639 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
 640 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
 641              case (x)+4: case (x)+5: case (x)+6: case (x)+7
 642 #define REP16(x) REP8((x)+0): \
 643               case REP8((x)+8)
 644 
 645   case CS_segment:
 646   case SS_segment:
 647   case DS_segment:
 648   case ES_segment:
 649   case FS_segment:
 650   case GS_segment:
 651     // Seems dubious
 652     LP64_ONLY(assert(false, "shouldn't have that prefix"));
 653     assert(ip == inst+1, "only one prefix allowed");
 654     goto again_after_prefix;
 655 
 656   case 0x67:
 657   case REX:
 658   case REX_B:
 659   case REX_X:
 660   case REX_XB:
 661   case REX_R:
 662   case REX_RB:
 663   case REX_RX:
 664   case REX_RXB:
 665     NOT_LP64(assert(false, "64bit prefixes"));
 666     goto again_after_prefix;
 667 
 668   case REX_W:
 669   case REX_WB:
 670   case REX_WX:
 671   case REX_WXB:
 672   case REX_WR:
 673   case REX_WRB:
 674   case REX_WRX:
 675   case REX_WRXB:
 676     NOT_LP64(assert(false, "64bit prefixes"));
 677     is_64bit = true;
 678     goto again_after_prefix;
 679 
 680   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
 681   case 0x88: // movb a, r
 682   case 0x89: // movl a, r
 683   case 0x8A: // movb r, a
 684   case 0x8B: // movl r, a
 685   case 0x8F: // popl a
 686     debug_only(has_disp32 = true);
 687     break;
 688 
 689   case 0x68: // pushq #32
 690     if (which == end_pc_operand) {
 691       return ip + 4;
 692     }
 693     assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
 694     return ip;                  // not produced by emit_operand
 695 
 696   case 0x66: // movw ... (size prefix)
 697     again_after_size_prefix2:
 698     switch (0xFF & *ip++) {
 699     case REX:
 700     case REX_B:
 701     case REX_X:
 702     case REX_XB:
 703     case REX_R:
 704     case REX_RB:
 705     case REX_RX:
 706     case REX_RXB:
 707     case REX_W:
 708     case REX_WB:
 709     case REX_WX:
 710     case REX_WXB:
 711     case REX_WR:
 712     case REX_WRB:
 713     case REX_WRX:
 714     case REX_WRXB:
 715       NOT_LP64(assert(false, "64bit prefix found"));
 716       goto again_after_size_prefix2;
 717     case 0x8B: // movw r, a
 718     case 0x89: // movw a, r
 719       debug_only(has_disp32 = true);
 720       break;
 721     case 0xC7: // movw a, #16
 722       debug_only(has_disp32 = true);
 723       tail_size = 2;  // the imm16
 724       break;
 725     case 0x0F: // several SSE/SSE2 variants
 726       ip--;    // reparse the 0x0F
 727       goto again_after_prefix;
 728     default:
 729       ShouldNotReachHere();
 730     }
 731     break;
 732 
 733   case REP8(0xB8): // movl/q r, #32/#64(oop?)
 734     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
 735     // these asserts are somewhat nonsensical
 736 #ifndef _LP64
 737     assert(which == imm_operand || which == disp32_operand,
 738            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 739 #else
 740     assert((which == call32_operand || which == imm_operand) && is_64bit ||
 741            which == narrow_oop_operand && !is_64bit,
 742            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 743 #endif // _LP64
 744     return ip;
 745 
 746   case 0x69: // imul r, a, #32
 747   case 0xC7: // movl a, #32(oop?)
 748     tail_size = 4;
 749     debug_only(has_disp32 = true); // has both kinds of operands!
 750     break;
 751 
 752   case 0x0F: // movx..., etc.
 753     switch (0xFF & *ip++) {
 754     case 0x3A: // pcmpestri
 755       tail_size = 1;
 756     case 0x38: // ptest, pmovzxbw
 757       ip++; // skip opcode
 758       debug_only(has_disp32 = true); // has both kinds of operands!
 759       break;
 760 
 761     case 0x70: // pshufd r, r/a, #8
 762       debug_only(has_disp32 = true); // has both kinds of operands!
 763     case 0x73: // psrldq r, #8
 764       tail_size = 1;
 765       break;
 766 
 767     case 0x12: // movlps
 768     case 0x28: // movaps
 769     case 0x2E: // ucomiss
 770     case 0x2F: // comiss
 771     case 0x54: // andps
 772     case 0x55: // andnps
 773     case 0x56: // orps
 774     case 0x57: // xorps
 775     case 0x58: // addpd
 776     case 0x59: // mulpd
 777     case 0x6E: // movd
 778     case 0x7E: // movd
 779     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
 780     case 0xFE: // paddd
 781       debug_only(has_disp32 = true);
 782       break;
 783 
 784     case 0xAD: // shrd r, a, %cl
 785     case 0xAF: // imul r, a
 786     case 0xBE: // movsbl r, a (movsxb)
 787     case 0xBF: // movswl r, a (movsxw)
 788     case 0xB6: // movzbl r, a (movzxb)
 789     case 0xB7: // movzwl r, a (movzxw)
 790     case REP16(0x40): // cmovl cc, r, a
 791     case 0xB0: // cmpxchgb
 792     case 0xB1: // cmpxchg
 793     case 0xC1: // xaddl
 794     case 0xC7: // cmpxchg8
 795     case REP16(0x90): // setcc a
 796       debug_only(has_disp32 = true);
 797       // fall out of the switch to decode the address
 798       break;
 799 
 800     case 0xC4: // pinsrw r, a, #8
 801       debug_only(has_disp32 = true);
 802     case 0xC5: // pextrw r, r, #8
 803       tail_size = 1;  // the imm8
 804       break;
 805 
 806     case 0xAC: // shrd r, a, #8
 807       debug_only(has_disp32 = true);
 808       tail_size = 1;  // the imm8
 809       break;
 810 
 811     case REP16(0x80): // jcc rdisp32
 812       if (which == end_pc_operand)  return ip + 4;
 813       assert(which == call32_operand, "jcc has no disp32 or imm");
 814       return ip;
 815     default:
 816       ShouldNotReachHere();
 817     }
 818     break;
 819 
 820   case 0x81: // addl a, #32; addl r, #32
 821     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 822     // on 32bit in the case of cmpl, the imm might be an oop
 823     tail_size = 4;
 824     debug_only(has_disp32 = true); // has both kinds of operands!
 825     break;
 826 
 827   case 0x83: // addl a, #8; addl r, #8
 828     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 829     debug_only(has_disp32 = true); // has both kinds of operands!
 830     tail_size = 1;
 831     break;
 832 
 833   case 0x9B:
 834     switch (0xFF & *ip++) {
 835     case 0xD9: // fnstcw a
 836       debug_only(has_disp32 = true);
 837       break;
 838     default:
 839       ShouldNotReachHere();
 840     }
 841     break;
 842 
 843   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
 844   case REP4(0x10): // adc...
 845   case REP4(0x20): // and...
 846   case REP4(0x30): // xor...
 847   case REP4(0x08): // or...
 848   case REP4(0x18): // sbb...
 849   case REP4(0x28): // sub...
 850   case 0xF7: // mull a
 851   case 0x8D: // lea r, a
 852   case 0x87: // xchg r, a
 853   case REP4(0x38): // cmp...
 854   case 0x85: // test r, a
 855     debug_only(has_disp32 = true); // has both kinds of operands!
 856     break;
 857 
 858   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
 859   case 0xC6: // movb a, #8
 860   case 0x80: // cmpb a, #8
 861   case 0x6B: // imul r, a, #8
 862     debug_only(has_disp32 = true); // has both kinds of operands!
 863     tail_size = 1; // the imm8
 864     break;
 865 
 866   case 0xC4: // VEX_3bytes
 867   case 0xC5: // VEX_2bytes
 868     assert((UseAVX > 0), "shouldn't have VEX prefix");
 869     assert(ip == inst+1, "no prefixes allowed");
 870     // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
 871     // but they have prefix 0x0F and processed when 0x0F processed above.
 872     //
 873     // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
 874     // instructions (these instructions are not supported in 64-bit mode).
 875     // To distinguish them bits [7:6] are set in the VEX second byte since
 876     // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
 877     // those VEX bits REX and vvvv bits are inverted.
 878     //
 879     // Fortunately C2 doesn't generate these instructions so we don't need
 880     // to check for them in product version.
 881 
 882     // Check second byte
 883     NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
 884 
 885     int vex_opcode;
 886     // First byte
 887     if ((0xFF & *inst) == VEX_3bytes) {
 888       vex_opcode = VEX_OPCODE_MASK & *ip;
 889       ip++; // third byte
 890       is_64bit = ((VEX_W & *ip) == VEX_W);
 891     } else {
 892       vex_opcode = VEX_OPCODE_0F;
 893     }
 894     ip++; // opcode
 895     // To find the end of instruction (which == end_pc_operand).
 896     switch (vex_opcode) {
 897       case VEX_OPCODE_0F:
 898         switch (0xFF & *ip) {
 899         case 0x70: // pshufd r, r/a, #8
 900         case 0x71: // ps[rl|ra|ll]w r, #8
 901         case 0x72: // ps[rl|ra|ll]d r, #8
 902         case 0x73: // ps[rl|ra|ll]q r, #8
 903         case 0xC2: // cmp[ps|pd|ss|sd] r, r, r/a, #8
 904         case 0xC4: // pinsrw r, r, r/a, #8
 905         case 0xC5: // pextrw r/a, r, #8
 906         case 0xC6: // shufp[s|d] r, r, r/a, #8
 907           tail_size = 1;  // the imm8
 908           break;
 909         }
 910         break;
 911       case VEX_OPCODE_0F_3A:
 912         tail_size = 1;
 913         break;
 914     }
 915     ip++; // skip opcode
 916     debug_only(has_disp32 = true); // has both kinds of operands!
 917     break;
 918 
 919   case 0x62: // EVEX_4bytes
 920     assert((UseAVX > 0), "shouldn't have EVEX prefix");
 921     assert(ip == inst+1, "no prefixes allowed");
 922     // no EVEX collisions, all instructions that have 0x62 opcodes
 923     // have EVEX versions and are subopcodes of 0x66
 924     ip++; // skip P0 and exmaine W in P1
 925     is_64bit = ((VEX_W & *ip) == VEX_W);
 926     ip++; // move to P2
 927     ip++; // skip P2, move to opcode
 928     // To find the end of instruction (which == end_pc_operand).
 929     switch (0xFF & *ip) {
 930     case 0x22: // pinsrd r, r/a, #8
 931     case 0x61: // pcmpestri r, r/a, #8
 932     case 0x70: // pshufd r, r/a, #8
 933     case 0x73: // psrldq r, #8
 934       tail_size = 1;  // the imm8
 935       break;
 936     default:
 937       break;
 938     }
 939     ip++; // skip opcode
 940     debug_only(has_disp32 = true); // has both kinds of operands!
 941     break;
 942 
 943   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 944   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 945   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 946   case 0xDD: // fld_d a; fst_d a; fstp_d a
 947   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 948   case 0xDF: // fild_d a; fistp_d a
 949   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 950   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 951   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 952     debug_only(has_disp32 = true);
 953     break;
 954 
 955   case 0xE8: // call rdisp32
 956   case 0xE9: // jmp  rdisp32
 957     if (which == end_pc_operand)  return ip + 4;
 958     assert(which == call32_operand, "call has no disp32 or imm");
 959     return ip;
 960 
 961   case 0xF0:                    // Lock
 962     assert(os::is_MP(), "only on MP");
 963     goto again_after_prefix;
 964 
 965   case 0xF3:                    // For SSE
 966   case 0xF2:                    // For SSE2
 967     switch (0xFF & *ip++) {
 968     case REX:
 969     case REX_B:
 970     case REX_X:
 971     case REX_XB:
 972     case REX_R:
 973     case REX_RB:
 974     case REX_RX:
 975     case REX_RXB:
 976     case REX_W:
 977     case REX_WB:
 978     case REX_WX:
 979     case REX_WXB:
 980     case REX_WR:
 981     case REX_WRB:
 982     case REX_WRX:
 983     case REX_WRXB:
 984       NOT_LP64(assert(false, "found 64bit prefix"));
 985       ip++;
 986     default:
 987       ip++;
 988     }
 989     debug_only(has_disp32 = true); // has both kinds of operands!
 990     break;
 991 
 992   default:
 993     ShouldNotReachHere();
 994 
 995 #undef REP8
 996 #undef REP16
 997   }
 998 
 999   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
1000 #ifdef _LP64
1001   assert(which != imm_operand, "instruction is not a movq reg, imm64");
1002 #else
1003   // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
1004   assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
1005 #endif // LP64
1006   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
1007 
1008   // parse the output of emit_operand
1009   int op2 = 0xFF & *ip++;
1010   int base = op2 & 0x07;
1011   int op3 = -1;
1012   const int b100 = 4;
1013   const int b101 = 5;
1014   if (base == b100 && (op2 >> 6) != 3) {
1015     op3 = 0xFF & *ip++;
1016     base = op3 & 0x07;   // refetch the base
1017   }
1018   // now ip points at the disp (if any)
1019 
1020   switch (op2 >> 6) {
1021   case 0:
1022     // [00 reg  100][ss index base]
1023     // [00 reg  100][00   100  esp]
1024     // [00 reg base]
1025     // [00 reg  100][ss index  101][disp32]
1026     // [00 reg  101]               [disp32]
1027 
1028     if (base == b101) {
1029       if (which == disp32_operand)
1030         return ip;              // caller wants the disp32
1031       ip += 4;                  // skip the disp32
1032     }
1033     break;
1034 
1035   case 1:
1036     // [01 reg  100][ss index base][disp8]
1037     // [01 reg  100][00   100  esp][disp8]
1038     // [01 reg base]               [disp8]
1039     ip += 1;                    // skip the disp8
1040     break;
1041 
1042   case 2:
1043     // [10 reg  100][ss index base][disp32]
1044     // [10 reg  100][00   100  esp][disp32]
1045     // [10 reg base]               [disp32]
1046     if (which == disp32_operand)
1047       return ip;                // caller wants the disp32
1048     ip += 4;                    // skip the disp32
1049     break;
1050 
1051   case 3:
1052     // [11 reg base]  (not a memory addressing mode)
1053     break;
1054   }
1055 
1056   if (which == end_pc_operand) {
1057     return ip + tail_size;
1058   }
1059 
1060 #ifdef _LP64
1061   assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
1062 #else
1063   assert(which == imm_operand, "instruction has only an imm field");
1064 #endif // LP64
1065   return ip;
1066 }
1067 
1068 address Assembler::locate_next_instruction(address inst) {
1069   // Secretly share code with locate_operand:
1070   return locate_operand(inst, end_pc_operand);
1071 }
1072 
1073 
1074 #ifdef ASSERT
1075 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
1076   address inst = inst_mark();
1077   assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
1078   address opnd;
1079 
1080   Relocation* r = rspec.reloc();
1081   if (r->type() == relocInfo::none) {
1082     return;
1083   } else if (r->is_call() || format == call32_operand) {
1084     // assert(format == imm32_operand, "cannot specify a nonzero format");
1085     opnd = locate_operand(inst, call32_operand);
1086   } else if (r->is_data()) {
1087     assert(format == imm_operand || format == disp32_operand
1088            LP64_ONLY(|| format == narrow_oop_operand), "format ok");
1089     opnd = locate_operand(inst, (WhichOperand)format);
1090   } else {
1091     assert(format == imm_operand, "cannot specify a format");
1092     return;
1093   }
1094   assert(opnd == pc(), "must put operand where relocs can find it");
1095 }
1096 #endif // ASSERT
1097 
1098 void Assembler::emit_operand32(Register reg, Address adr) {
1099   assert(reg->encoding() < 8, "no extended registers");
1100   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1101   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1102                adr._rspec);
1103 }
1104 
1105 void Assembler::emit_operand(Register reg, Address adr,
1106                              int rip_relative_correction) {
1107   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1108                adr._rspec,
1109                rip_relative_correction);
1110 }
1111 
1112 void Assembler::emit_operand(XMMRegister reg, Address adr) {
1113   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1114                adr._rspec);
1115 }
1116 
1117 // MMX operations
1118 void Assembler::emit_operand(MMXRegister reg, Address adr) {
1119   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1120   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1121 }
1122 
1123 // work around gcc (3.2.1-7a) bug
1124 void Assembler::emit_operand(Address adr, MMXRegister reg) {
1125   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1126   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1127 }
1128 
1129 
1130 void Assembler::emit_farith(int b1, int b2, int i) {
1131   assert(isByte(b1) && isByte(b2), "wrong opcode");
1132   assert(0 <= i &&  i < 8, "illegal stack offset");
1133   emit_int8(b1);
1134   emit_int8(b2 + i);
1135 }
1136 
1137 
1138 // Now the Assembler instructions (identical for 32/64 bits)
1139 
1140 void Assembler::adcl(Address dst, int32_t imm32) {
1141   InstructionMark im(this);
1142   prefix(dst);
1143   emit_arith_operand(0x81, rdx, dst, imm32);
1144 }
1145 
1146 void Assembler::adcl(Address dst, Register src) {
1147   InstructionMark im(this);
1148   prefix(dst, src);
1149   emit_int8(0x11);
1150   emit_operand(src, dst);
1151 }
1152 
1153 void Assembler::adcl(Register dst, int32_t imm32) {
1154   prefix(dst);
1155   emit_arith(0x81, 0xD0, dst, imm32);
1156 }
1157 
1158 void Assembler::adcl(Register dst, Address src) {
1159   InstructionMark im(this);
1160   prefix(src, dst);
1161   emit_int8(0x13);
1162   emit_operand(dst, src);
1163 }
1164 
1165 void Assembler::adcl(Register dst, Register src) {
1166   (void) prefix_and_encode(dst->encoding(), src->encoding());
1167   emit_arith(0x13, 0xC0, dst, src);
1168 }
1169 
1170 void Assembler::addl(Address dst, int32_t imm32) {
1171   InstructionMark im(this);
1172   prefix(dst);
1173   emit_arith_operand(0x81, rax, dst, imm32);
1174 }
1175 
1176 void Assembler::addb(Address dst, int imm8) {
1177   InstructionMark im(this);
1178   prefix(dst);
1179   emit_int8((unsigned char)0x80);
1180   emit_operand(rax, dst, 1);
1181   emit_int8(imm8);
1182 }
1183 
1184 void Assembler::addw(Address dst, int imm16) {
1185   InstructionMark im(this);
1186   emit_int8(0x66);
1187   prefix(dst);
1188   emit_int8((unsigned char)0x81);
1189   emit_operand(rax, dst, 2);
1190   emit_int16(imm16);
1191 }
1192 
1193 void Assembler::addl(Address dst, Register src) {
1194   InstructionMark im(this);
1195   prefix(dst, src);
1196   emit_int8(0x01);
1197   emit_operand(src, dst);
1198 }
1199 
1200 void Assembler::addl(Register dst, int32_t imm32) {
1201   prefix(dst);
1202   emit_arith(0x81, 0xC0, dst, imm32);
1203 }
1204 
1205 void Assembler::addl(Register dst, Address src) {
1206   InstructionMark im(this);
1207   prefix(src, dst);
1208   emit_int8(0x03);
1209   emit_operand(dst, src);
1210 }
1211 
1212 void Assembler::addl(Register dst, Register src) {
1213   (void) prefix_and_encode(dst->encoding(), src->encoding());
1214   emit_arith(0x03, 0xC0, dst, src);
1215 }
1216 
1217 void Assembler::addr_nop_4() {
1218   assert(UseAddressNop, "no CPU support");
1219   // 4 bytes: NOP DWORD PTR [EAX+0]
1220   emit_int8(0x0F);
1221   emit_int8(0x1F);
1222   emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
1223   emit_int8(0);    // 8-bits offset (1 byte)
1224 }
1225 
1226 void Assembler::addr_nop_5() {
1227   assert(UseAddressNop, "no CPU support");
1228   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
1229   emit_int8(0x0F);
1230   emit_int8(0x1F);
1231   emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
1232   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1233   emit_int8(0);    // 8-bits offset (1 byte)
1234 }
1235 
1236 void Assembler::addr_nop_7() {
1237   assert(UseAddressNop, "no CPU support");
1238   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
1239   emit_int8(0x0F);
1240   emit_int8(0x1F);
1241   emit_int8((unsigned char)0x80);
1242                    // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
1243   emit_int32(0);   // 32-bits offset (4 bytes)
1244 }
1245 
1246 void Assembler::addr_nop_8() {
1247   assert(UseAddressNop, "no CPU support");
1248   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1249   emit_int8(0x0F);
1250   emit_int8(0x1F);
1251   emit_int8((unsigned char)0x84);
1252                    // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
1253   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1254   emit_int32(0);   // 32-bits offset (4 bytes)
1255 }
1256 
1257 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
1258   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1259   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1260   attributes.set_rex_vex_w_reverted();
1261   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1262   emit_int8(0x58);
1263   emit_int8((unsigned char)(0xC0 | encode));
1264 }
1265 
1266 void Assembler::addsd(XMMRegister dst, Address src) {
1267   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1268   InstructionMark im(this);
1269   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1270   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1271   attributes.set_rex_vex_w_reverted();
1272   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1273   emit_int8(0x58);
1274   emit_operand(dst, src);
1275 }
1276 
1277 void Assembler::addss(XMMRegister dst, XMMRegister src) {
1278   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1279   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1280   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1281   emit_int8(0x58);
1282   emit_int8((unsigned char)(0xC0 | encode));
1283 }
1284 
1285 void Assembler::addss(XMMRegister dst, Address src) {
1286   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1287   InstructionMark im(this);
1288   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1289   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1290   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1291   emit_int8(0x58);
1292   emit_operand(dst, src);
1293 }
1294 
1295 void Assembler::aesdec(XMMRegister dst, Address src) {
1296   assert(VM_Version::supports_aes(), "");
1297   InstructionMark im(this);
1298   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1299   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1300   emit_int8((unsigned char)0xDE);
1301   emit_operand(dst, src);
1302 }
1303 
1304 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1305   assert(VM_Version::supports_aes(), "");
1306   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1307   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1308   emit_int8((unsigned char)0xDE);
1309   emit_int8(0xC0 | encode);
1310 }
1311 
1312 void Assembler::aesdeclast(XMMRegister dst, Address src) {
1313   assert(VM_Version::supports_aes(), "");
1314   InstructionMark im(this);
1315   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1316   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1317   emit_int8((unsigned char)0xDF);
1318   emit_operand(dst, src);
1319 }
1320 
1321 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1322   assert(VM_Version::supports_aes(), "");
1323   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1324   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1325   emit_int8((unsigned char)0xDF);
1326   emit_int8((unsigned char)(0xC0 | encode));
1327 }
1328 
1329 void Assembler::aesenc(XMMRegister dst, Address src) {
1330   assert(VM_Version::supports_aes(), "");
1331   InstructionMark im(this);
1332   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1333   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1334   emit_int8((unsigned char)0xDC);
1335   emit_operand(dst, src);
1336 }
1337 
1338 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1339   assert(VM_Version::supports_aes(), "");
1340   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1341   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1342   emit_int8((unsigned char)0xDC);
1343   emit_int8(0xC0 | encode);
1344 }
1345 
1346 void Assembler::aesenclast(XMMRegister dst, Address src) {
1347   assert(VM_Version::supports_aes(), "");
1348   InstructionMark im(this);
1349   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1350   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1351   emit_int8((unsigned char)0xDD);
1352   emit_operand(dst, src);
1353 }
1354 
1355 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1356   assert(VM_Version::supports_aes(), "");
1357   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1358   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1359   emit_int8((unsigned char)0xDD);
1360   emit_int8((unsigned char)(0xC0 | encode));
1361 }
1362 
1363 void Assembler::andl(Address dst, int32_t imm32) {
1364   InstructionMark im(this);
1365   prefix(dst);
1366   emit_int8((unsigned char)0x81);
1367   emit_operand(rsp, dst, 4);
1368   emit_int32(imm32);
1369 }
1370 
1371 void Assembler::andl(Register dst, int32_t imm32) {
1372   prefix(dst);
1373   emit_arith(0x81, 0xE0, dst, imm32);
1374 }
1375 
1376 void Assembler::andl(Register dst, Address src) {
1377   InstructionMark im(this);
1378   prefix(src, dst);
1379   emit_int8(0x23);
1380   emit_operand(dst, src);
1381 }
1382 
1383 void Assembler::andl(Register dst, Register src) {
1384   (void) prefix_and_encode(dst->encoding(), src->encoding());
1385   emit_arith(0x23, 0xC0, dst, src);
1386 }
1387 
1388 void Assembler::andnl(Register dst, Register src1, Register src2) {
1389   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1390   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1391   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1392   emit_int8((unsigned char)0xF2);
1393   emit_int8((unsigned char)(0xC0 | encode));
1394 }
1395 
1396 void Assembler::andnl(Register dst, Register src1, Address src2) {
1397   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1398   InstructionMark im(this);
1399   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1400   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1401   emit_int8((unsigned char)0xF2);
1402   emit_operand(dst, src2);
1403 }
1404 
1405 void Assembler::bsfl(Register dst, Register src) {
1406   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1407   emit_int8(0x0F);
1408   emit_int8((unsigned char)0xBC);
1409   emit_int8((unsigned char)(0xC0 | encode));
1410 }
1411 
1412 void Assembler::bsrl(Register dst, Register src) {
1413   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1414   emit_int8(0x0F);
1415   emit_int8((unsigned char)0xBD);
1416   emit_int8((unsigned char)(0xC0 | encode));
1417 }
1418 
1419 void Assembler::bswapl(Register reg) { // bswap
1420   int encode = prefix_and_encode(reg->encoding());
1421   emit_int8(0x0F);
1422   emit_int8((unsigned char)(0xC8 | encode));
1423 }
1424 
1425 void Assembler::blsil(Register dst, Register src) {
1426   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1427   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1428   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1429   emit_int8((unsigned char)0xF3);
1430   emit_int8((unsigned char)(0xC0 | encode));
1431 }
1432 
1433 void Assembler::blsil(Register dst, Address src) {
1434   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1435   InstructionMark im(this);
1436   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1437   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1438   emit_int8((unsigned char)0xF3);
1439   emit_operand(rbx, src);
1440 }
1441 
1442 void Assembler::blsmskl(Register dst, Register src) {
1443   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1444   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1445   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1446   emit_int8((unsigned char)0xF3);
1447   emit_int8((unsigned char)(0xC0 | encode));
1448 }
1449 
1450 void Assembler::blsmskl(Register dst, Address src) {
1451   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1452   InstructionMark im(this);
1453   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1454   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1455   emit_int8((unsigned char)0xF3);
1456   emit_operand(rdx, src);
1457 }
1458 
1459 void Assembler::blsrl(Register dst, Register src) {
1460   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1461   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1462   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1463   emit_int8((unsigned char)0xF3);
1464   emit_int8((unsigned char)(0xC0 | encode));
1465 }
1466 
1467 void Assembler::blsrl(Register dst, Address src) {
1468   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1469   InstructionMark im(this);
1470   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
1471   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1472   emit_int8((unsigned char)0xF3);
1473   emit_operand(rcx, src);
1474 }
1475 
1476 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1477   // suspect disp32 is always good
1478   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1479 
1480   if (L.is_bound()) {
1481     const int long_size = 5;
1482     int offs = (int)( target(L) - pc() );
1483     assert(offs <= 0, "assembler error");
1484     InstructionMark im(this);
1485     // 1110 1000 #32-bit disp
1486     emit_int8((unsigned char)0xE8);
1487     emit_data(offs - long_size, rtype, operand);
1488   } else {
1489     InstructionMark im(this);
1490     // 1110 1000 #32-bit disp
1491     L.add_patch_at(code(), locator());
1492 
1493     emit_int8((unsigned char)0xE8);
1494     emit_data(int(0), rtype, operand);
1495   }
1496 }
1497 
1498 void Assembler::call(Register dst) {
1499   int encode = prefix_and_encode(dst->encoding());
1500   emit_int8((unsigned char)0xFF);
1501   emit_int8((unsigned char)(0xD0 | encode));
1502 }
1503 
1504 
1505 void Assembler::call(Address adr) {
1506   InstructionMark im(this);
1507   prefix(adr);
1508   emit_int8((unsigned char)0xFF);
1509   emit_operand(rdx, adr);
1510 }
1511 
1512 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1513   assert(entry != NULL, "call most probably wrong");
1514   InstructionMark im(this);
1515   emit_int8((unsigned char)0xE8);
1516   intptr_t disp = entry - (pc() + sizeof(int32_t));
1517   assert(is_simm32(disp), "must be 32bit offset (call2)");
1518   // Technically, should use call32_operand, but this format is
1519   // implied by the fact that we're emitting a call instruction.
1520 
1521   int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1522   emit_data((int) disp, rspec, operand);
1523 }
1524 
1525 void Assembler::cdql() {
1526   emit_int8((unsigned char)0x99);
1527 }
1528 
1529 void Assembler::cld() {
1530   emit_int8((unsigned char)0xFC);
1531 }
1532 
1533 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1534   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1535   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1536   emit_int8(0x0F);
1537   emit_int8(0x40 | cc);
1538   emit_int8((unsigned char)(0xC0 | encode));
1539 }
1540 
1541 
1542 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1543   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1544   prefix(src, dst);
1545   emit_int8(0x0F);
1546   emit_int8(0x40 | cc);
1547   emit_operand(dst, src);
1548 }
1549 
1550 void Assembler::cmpb(Address dst, int imm8) {
1551   InstructionMark im(this);
1552   prefix(dst);
1553   emit_int8((unsigned char)0x80);
1554   emit_operand(rdi, dst, 1);
1555   emit_int8(imm8);
1556 }
1557 
1558 void Assembler::cmpl(Address dst, int32_t imm32) {
1559   InstructionMark im(this);
1560   prefix(dst);
1561   emit_int8((unsigned char)0x81);
1562   emit_operand(rdi, dst, 4);
1563   emit_int32(imm32);
1564 }
1565 
1566 void Assembler::cmpl(Register dst, int32_t imm32) {
1567   prefix(dst);
1568   emit_arith(0x81, 0xF8, dst, imm32);
1569 }
1570 
1571 void Assembler::cmpl(Register dst, Register src) {
1572   (void) prefix_and_encode(dst->encoding(), src->encoding());
1573   emit_arith(0x3B, 0xC0, dst, src);
1574 }
1575 
1576 void Assembler::cmpl(Register dst, Address  src) {
1577   InstructionMark im(this);
1578   prefix(src, dst);
1579   emit_int8((unsigned char)0x3B);
1580   emit_operand(dst, src);
1581 }
1582 
1583 void Assembler::cmpw(Address dst, int imm16) {
1584   InstructionMark im(this);
1585   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1586   emit_int8(0x66);
1587   emit_int8((unsigned char)0x81);
1588   emit_operand(rdi, dst, 2);
1589   emit_int16(imm16);
1590 }
1591 
1592 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1593 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1594 // The ZF is set if the compared values were equal, and cleared otherwise.
1595 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1596   InstructionMark im(this);
1597   prefix(adr, reg);
1598   emit_int8(0x0F);
1599   emit_int8((unsigned char)0xB1);
1600   emit_operand(reg, adr);
1601 }
1602 
1603 // The 8-bit cmpxchg compares the value at adr with the contents of rax,
1604 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1605 // The ZF is set if the compared values were equal, and cleared otherwise.
1606 void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg
1607   InstructionMark im(this);
1608   prefix(adr, reg, true);
1609   emit_int8(0x0F);
1610   emit_int8((unsigned char)0xB0);
1611   emit_operand(reg, adr);
1612 }
1613 
1614 void Assembler::comisd(XMMRegister dst, Address src) {
1615   // NOTE: dbx seems to decode this as comiss even though the
1616   // 0x66 is there. Strangly ucomisd comes out correct
1617   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1618   InstructionMark im(this);
1619   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);;
1620   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1621   attributes.set_rex_vex_w_reverted();
1622   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1623   emit_int8(0x2F);
1624   emit_operand(dst, src);
1625 }
1626 
1627 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1628   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1629   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1630   attributes.set_rex_vex_w_reverted();
1631   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1632   emit_int8(0x2F);
1633   emit_int8((unsigned char)(0xC0 | encode));
1634 }
1635 
1636 void Assembler::comiss(XMMRegister dst, Address src) {
1637   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1638   InstructionMark im(this);
1639   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1640   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1641   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1642   emit_int8(0x2F);
1643   emit_operand(dst, src);
1644 }
1645 
1646 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1647   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1648   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1649   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1650   emit_int8(0x2F);
1651   emit_int8((unsigned char)(0xC0 | encode));
1652 }
1653 
1654 void Assembler::cpuid() {
1655   emit_int8(0x0F);
1656   emit_int8((unsigned char)0xA2);
1657 }
1658 
1659 // Opcode / Instruction                      Op /  En  64 - Bit Mode     Compat / Leg Mode Description                  Implemented
1660 // F2 0F 38 F0 / r       CRC32 r32, r / m8   RM        Valid             Valid             Accumulate CRC32 on r / m8.  v
1661 // F2 REX 0F 38 F0 / r   CRC32 r32, r / m8*  RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1662 // F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8   RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1663 //
1664 // F2 0F 38 F1 / r       CRC32 r32, r / m16  RM        Valid             Valid             Accumulate CRC32 on r / m16. v
1665 //
1666 // F2 0F 38 F1 / r       CRC32 r32, r / m32  RM        Valid             Valid             Accumulate CRC32 on r / m32. v
1667 //
1668 // F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64  RM        Valid             N.E.              Accumulate CRC32 on r / m64. v
1669 void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) {
1670   assert(VM_Version::supports_sse4_2(), "");
1671   int8_t w = 0x01;
1672   Prefix p = Prefix_EMPTY;
1673 
1674   emit_int8((int8_t)0xF2);
1675   switch (sizeInBytes) {
1676   case 1:
1677     w = 0;
1678     break;
1679   case 2:
1680   case 4:
1681     break;
1682   LP64_ONLY(case 8:)
1683     // This instruction is not valid in 32 bits
1684     // Note:
1685     // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
1686     //
1687     // Page B - 72   Vol. 2C says
1688     // qwreg2 to qwreg            1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2
1689     // mem64 to qwreg             1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m
1690     //                                                                            F0!!!
1691     // while 3 - 208 Vol. 2A
1692     // F2 REX.W 0F 38 F1 / r       CRC32 r64, r / m64             RM         Valid      N.E.Accumulate CRC32 on r / m64.
1693     //
1694     // the 0 on a last bit is reserved for a different flavor of this instruction :
1695     // F2 REX.W 0F 38 F0 / r       CRC32 r64, r / m8              RM         Valid      N.E.Accumulate CRC32 on r / m8.
1696     p = REX_W;
1697     break;
1698   default:
1699     assert(0, "Unsupported value for a sizeInBytes argument");
1700     break;
1701   }
1702   LP64_ONLY(prefix(crc, v, p);)
1703   emit_int8((int8_t)0x0F);
1704   emit_int8(0x38);
1705   emit_int8((int8_t)(0xF0 | w));
1706   emit_int8(0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7));
1707 }
1708 
1709 void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) {
1710   assert(VM_Version::supports_sse4_2(), "");
1711   InstructionMark im(this);
1712   int8_t w = 0x01;
1713   Prefix p = Prefix_EMPTY;
1714 
1715   emit_int8((int8_t)0xF2);
1716   switch (sizeInBytes) {
1717   case 1:
1718     w = 0;
1719     break;
1720   case 2:
1721   case 4:
1722     break;
1723   LP64_ONLY(case 8:)
1724     // This instruction is not valid in 32 bits
1725     p = REX_W;
1726     break;
1727   default:
1728     assert(0, "Unsupported value for a sizeInBytes argument");
1729     break;
1730   }
1731   LP64_ONLY(prefix(crc, adr, p);)
1732   emit_int8((int8_t)0x0F);
1733   emit_int8(0x38);
1734   emit_int8((int8_t)(0xF0 | w));
1735   emit_operand(crc, adr);
1736 }
1737 
1738 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1739   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1740   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1741   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1742   emit_int8((unsigned char)0xE6);
1743   emit_int8((unsigned char)(0xC0 | encode));
1744 }
1745 
1746 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1747   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1748   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1749   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1750   emit_int8(0x5B);
1751   emit_int8((unsigned char)(0xC0 | encode));
1752 }
1753 
1754 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1755   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1756   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1757   attributes.set_rex_vex_w_reverted();
1758   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1759   emit_int8(0x5A);
1760   emit_int8((unsigned char)(0xC0 | encode));
1761 }
1762 
1763 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1764   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1765   InstructionMark im(this);
1766   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1767   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1768   attributes.set_rex_vex_w_reverted();
1769   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1770   emit_int8(0x5A);
1771   emit_operand(dst, src);
1772 }
1773 
1774 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1775   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1776   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1777   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1778   emit_int8(0x2A);
1779   emit_int8((unsigned char)(0xC0 | encode));
1780 }
1781 
1782 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1783   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1784   InstructionMark im(this);
1785   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1786   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1787   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1788   emit_int8(0x2A);
1789   emit_operand(dst, src);
1790 }
1791 
1792 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1793   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1794   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1795   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1796   emit_int8(0x2A);
1797   emit_int8((unsigned char)(0xC0 | encode));
1798 }
1799 
1800 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1801   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1802   InstructionMark im(this);
1803   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1804   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1805   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1806   emit_int8(0x2A);
1807   emit_operand(dst, src);
1808 }
1809 
1810 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
1811   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1812   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1813   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1814   emit_int8(0x2A);
1815   emit_int8((unsigned char)(0xC0 | encode));
1816 }
1817 
1818 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1819   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1820   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1821   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1822   emit_int8(0x5A);
1823   emit_int8((unsigned char)(0xC0 | encode));
1824 }
1825 
1826 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1827   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1828   InstructionMark im(this);
1829   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1830   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1831   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1832   emit_int8(0x5A);
1833   emit_operand(dst, src);
1834 }
1835 
1836 
1837 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1838   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1839   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1840   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1841   emit_int8(0x2C);
1842   emit_int8((unsigned char)(0xC0 | encode));
1843 }
1844 
1845 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1846   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1847   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1848   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1849   emit_int8(0x2C);
1850   emit_int8((unsigned char)(0xC0 | encode));
1851 }
1852 
1853 void Assembler::cvttpd2dq(XMMRegister dst, XMMRegister src) {
1854   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1855   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
1856   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1857   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1858   emit_int8((unsigned char)0xE6);
1859   emit_int8((unsigned char)(0xC0 | encode));
1860 }
1861 
1862 void Assembler::decl(Address dst) {
1863   // Don't use it directly. Use MacroAssembler::decrement() instead.
1864   InstructionMark im(this);
1865   prefix(dst);
1866   emit_int8((unsigned char)0xFF);
1867   emit_operand(rcx, dst);
1868 }
1869 
1870 void Assembler::divsd(XMMRegister dst, Address src) {
1871   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1872   InstructionMark im(this);
1873   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1874   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1875   attributes.set_rex_vex_w_reverted();
1876   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1877   emit_int8(0x5E);
1878   emit_operand(dst, src);
1879 }
1880 
1881 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1882   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1883   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1884   attributes.set_rex_vex_w_reverted();
1885   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1886   emit_int8(0x5E);
1887   emit_int8((unsigned char)(0xC0 | encode));
1888 }
1889 
1890 void Assembler::divss(XMMRegister dst, Address src) {
1891   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1892   InstructionMark im(this);
1893   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1894   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1895   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1896   emit_int8(0x5E);
1897   emit_operand(dst, src);
1898 }
1899 
1900 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1901   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1902   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
1903   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1904   emit_int8(0x5E);
1905   emit_int8((unsigned char)(0xC0 | encode));
1906 }
1907 
1908 void Assembler::emms() {
1909   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1910   emit_int8(0x0F);
1911   emit_int8(0x77);
1912 }
1913 
1914 void Assembler::hlt() {
1915   emit_int8((unsigned char)0xF4);
1916 }
1917 
1918 void Assembler::idivl(Register src) {
1919   int encode = prefix_and_encode(src->encoding());
1920   emit_int8((unsigned char)0xF7);
1921   emit_int8((unsigned char)(0xF8 | encode));
1922 }
1923 
1924 void Assembler::divl(Register src) { // Unsigned
1925   int encode = prefix_and_encode(src->encoding());
1926   emit_int8((unsigned char)0xF7);
1927   emit_int8((unsigned char)(0xF0 | encode));
1928 }
1929 
1930 void Assembler::imull(Register src) {
1931   int encode = prefix_and_encode(src->encoding());
1932   emit_int8((unsigned char)0xF7);
1933   emit_int8((unsigned char)(0xE8 | encode));
1934 }
1935 
1936 void Assembler::imull(Register dst, Register src) {
1937   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1938   emit_int8(0x0F);
1939   emit_int8((unsigned char)0xAF);
1940   emit_int8((unsigned char)(0xC0 | encode));
1941 }
1942 
1943 
1944 void Assembler::imull(Register dst, Register src, int value) {
1945   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1946   if (is8bit(value)) {
1947     emit_int8(0x6B);
1948     emit_int8((unsigned char)(0xC0 | encode));
1949     emit_int8(value & 0xFF);
1950   } else {
1951     emit_int8(0x69);
1952     emit_int8((unsigned char)(0xC0 | encode));
1953     emit_int32(value);
1954   }
1955 }
1956 
1957 void Assembler::imull(Register dst, Address src) {
1958   InstructionMark im(this);
1959   prefix(src, dst);
1960   emit_int8(0x0F);
1961   emit_int8((unsigned char) 0xAF);
1962   emit_operand(dst, src);
1963 }
1964 
1965 
1966 void Assembler::incl(Address dst) {
1967   // Don't use it directly. Use MacroAssembler::increment() instead.
1968   InstructionMark im(this);
1969   prefix(dst);
1970   emit_int8((unsigned char)0xFF);
1971   emit_operand(rax, dst);
1972 }
1973 
1974 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
1975   InstructionMark im(this);
1976   assert((0 <= cc) && (cc < 16), "illegal cc");
1977   if (L.is_bound()) {
1978     address dst = target(L);
1979     assert(dst != NULL, "jcc most probably wrong");
1980 
1981     const int short_size = 2;
1982     const int long_size = 6;
1983     intptr_t offs = (intptr_t)dst - (intptr_t)pc();
1984     if (maybe_short && is8bit(offs - short_size)) {
1985       // 0111 tttn #8-bit disp
1986       emit_int8(0x70 | cc);
1987       emit_int8((offs - short_size) & 0xFF);
1988     } else {
1989       // 0000 1111 1000 tttn #32-bit disp
1990       assert(is_simm32(offs - long_size),
1991              "must be 32bit offset (call4)");
1992       emit_int8(0x0F);
1993       emit_int8((unsigned char)(0x80 | cc));
1994       emit_int32(offs - long_size);
1995     }
1996   } else {
1997     // Note: could eliminate cond. jumps to this jump if condition
1998     //       is the same however, seems to be rather unlikely case.
1999     // Note: use jccb() if label to be bound is very close to get
2000     //       an 8-bit displacement
2001     L.add_patch_at(code(), locator());
2002     emit_int8(0x0F);
2003     emit_int8((unsigned char)(0x80 | cc));
2004     emit_int32(0);
2005   }
2006 }
2007 
2008 void Assembler::jccb(Condition cc, Label& L) {
2009   if (L.is_bound()) {
2010     const int short_size = 2;
2011     address entry = target(L);
2012 #ifdef ASSERT
2013     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2014     intptr_t delta = short_branch_delta();
2015     if (delta != 0) {
2016       dist += (dist < 0 ? (-delta) :delta);
2017     }
2018     assert(is8bit(dist), "Dispacement too large for a short jmp");
2019 #endif
2020     intptr_t offs = (intptr_t)entry - (intptr_t)pc();
2021     // 0111 tttn #8-bit disp
2022     emit_int8(0x70 | cc);
2023     emit_int8((offs - short_size) & 0xFF);
2024   } else {
2025     InstructionMark im(this);
2026     L.add_patch_at(code(), locator());
2027     emit_int8(0x70 | cc);
2028     emit_int8(0);
2029   }
2030 }
2031 
2032 void Assembler::jmp(Address adr) {
2033   InstructionMark im(this);
2034   prefix(adr);
2035   emit_int8((unsigned char)0xFF);
2036   emit_operand(rsp, adr);
2037 }
2038 
2039 void Assembler::jmp(Label& L, bool maybe_short) {
2040   if (L.is_bound()) {
2041     address entry = target(L);
2042     assert(entry != NULL, "jmp most probably wrong");
2043     InstructionMark im(this);
2044     const int short_size = 2;
2045     const int long_size = 5;
2046     intptr_t offs = entry - pc();
2047     if (maybe_short && is8bit(offs - short_size)) {
2048       emit_int8((unsigned char)0xEB);
2049       emit_int8((offs - short_size) & 0xFF);
2050     } else {
2051       emit_int8((unsigned char)0xE9);
2052       emit_int32(offs - long_size);
2053     }
2054   } else {
2055     // By default, forward jumps are always 32-bit displacements, since
2056     // we can't yet know where the label will be bound.  If you're sure that
2057     // the forward jump will not run beyond 256 bytes, use jmpb to
2058     // force an 8-bit displacement.
2059     InstructionMark im(this);
2060     L.add_patch_at(code(), locator());
2061     emit_int8((unsigned char)0xE9);
2062     emit_int32(0);
2063   }
2064 }
2065 
2066 void Assembler::jmp(Register entry) {
2067   int encode = prefix_and_encode(entry->encoding());
2068   emit_int8((unsigned char)0xFF);
2069   emit_int8((unsigned char)(0xE0 | encode));
2070 }
2071 
2072 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
2073   InstructionMark im(this);
2074   emit_int8((unsigned char)0xE9);
2075   assert(dest != NULL, "must have a target");
2076   intptr_t disp = dest - (pc() + sizeof(int32_t));
2077   assert(is_simm32(disp), "must be 32bit offset (jmp)");
2078   emit_data(disp, rspec.reloc(), call32_operand);
2079 }
2080 
2081 void Assembler::jmpb(Label& L) {
2082   if (L.is_bound()) {
2083     const int short_size = 2;
2084     address entry = target(L);
2085     assert(entry != NULL, "jmp most probably wrong");
2086 #ifdef ASSERT
2087     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2088     intptr_t delta = short_branch_delta();
2089     if (delta != 0) {
2090       dist += (dist < 0 ? (-delta) :delta);
2091     }
2092     assert(is8bit(dist), "Dispacement too large for a short jmp");
2093 #endif
2094     intptr_t offs = entry - pc();
2095     emit_int8((unsigned char)0xEB);
2096     emit_int8((offs - short_size) & 0xFF);
2097   } else {
2098     InstructionMark im(this);
2099     L.add_patch_at(code(), locator());
2100     emit_int8((unsigned char)0xEB);
2101     emit_int8(0);
2102   }
2103 }
2104 
2105 void Assembler::ldmxcsr( Address src) {
2106   if (UseAVX > 0 ) {
2107     InstructionMark im(this);
2108     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
2109     vex_prefix(src, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2110     emit_int8((unsigned char)0xAE);
2111     emit_operand(as_Register(2), src);
2112   } else {
2113     NOT_LP64(assert(VM_Version::supports_sse(), ""));
2114     InstructionMark im(this);
2115     prefix(src);
2116     emit_int8(0x0F);
2117     emit_int8((unsigned char)0xAE);
2118     emit_operand(as_Register(2), src);
2119   }
2120 }
2121 
2122 void Assembler::leal(Register dst, Address src) {
2123   InstructionMark im(this);
2124 #ifdef _LP64
2125   emit_int8(0x67); // addr32
2126   prefix(src, dst);
2127 #endif // LP64
2128   emit_int8((unsigned char)0x8D);
2129   emit_operand(dst, src);
2130 }
2131 
2132 void Assembler::lfence() {
2133   emit_int8(0x0F);
2134   emit_int8((unsigned char)0xAE);
2135   emit_int8((unsigned char)0xE8);
2136 }
2137 
2138 void Assembler::lock() {
2139   emit_int8((unsigned char)0xF0);
2140 }
2141 
2142 void Assembler::lzcntl(Register dst, Register src) {
2143   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
2144   emit_int8((unsigned char)0xF3);
2145   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2146   emit_int8(0x0F);
2147   emit_int8((unsigned char)0xBD);
2148   emit_int8((unsigned char)(0xC0 | encode));
2149 }
2150 
2151 // Emit mfence instruction
2152 void Assembler::mfence() {
2153   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
2154   emit_int8(0x0F);
2155   emit_int8((unsigned char)0xAE);
2156   emit_int8((unsigned char)0xF0);
2157 }
2158 
2159 void Assembler::mov(Register dst, Register src) {
2160   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2161 }
2162 
2163 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
2164   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2165   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2166   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2167   attributes.set_rex_vex_w_reverted();
2168   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2169   emit_int8(0x28);
2170   emit_int8((unsigned char)(0xC0 | encode));
2171 }
2172 
2173 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
2174   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2175   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2176   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2177   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2178   emit_int8(0x28);
2179   emit_int8((unsigned char)(0xC0 | encode));
2180 }
2181 
2182 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
2183   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2184   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2185   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2186   emit_int8(0x16);
2187   emit_int8((unsigned char)(0xC0 | encode));
2188 }
2189 
2190 void Assembler::movb(Register dst, Address src) {
2191   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2192   InstructionMark im(this);
2193   prefix(src, dst, true);
2194   emit_int8((unsigned char)0x8A);
2195   emit_operand(dst, src);
2196 }
2197 
2198 void Assembler::movddup(XMMRegister dst, XMMRegister src) {
2199   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
2200   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2201   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2202   attributes.set_rex_vex_w_reverted();
2203   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2204   emit_int8(0x12);
2205   emit_int8(0xC0 | encode);
2206 }
2207 
2208 void Assembler::kmovbl(KRegister dst, Register src) {
2209   assert(VM_Version::supports_avx512dq(), "");
2210   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2211   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2212   emit_int8((unsigned char)0x92);
2213   emit_int8((unsigned char)(0xC0 | encode));
2214 }
2215 
2216 void Assembler::kmovbl(Register dst, KRegister src) {
2217   assert(VM_Version::supports_avx512dq(), "");
2218   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2219   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2220   emit_int8((unsigned char)0x93);
2221   emit_int8((unsigned char)(0xC0 | encode));
2222 }
2223 
2224 void Assembler::kmovwl(KRegister dst, Register src) {
2225   assert(VM_Version::supports_evex(), "");
2226   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2227   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2228   emit_int8((unsigned char)0x92);
2229   emit_int8((unsigned char)(0xC0 | encode));
2230 }
2231 
2232 void Assembler::kmovwl(Register dst, KRegister src) {
2233   assert(VM_Version::supports_evex(), "");
2234   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2235   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2236   emit_int8((unsigned char)0x93);
2237   emit_int8((unsigned char)(0xC0 | encode));
2238 }
2239 
2240 void Assembler::kmovwl(KRegister dst, Address src) {
2241   assert(VM_Version::supports_evex(), "");
2242   InstructionMark im(this);
2243   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2244   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2245   emit_int8((unsigned char)0x90);
2246   emit_operand((Register)dst, src);
2247 }
2248 
2249 void Assembler::kmovdl(KRegister dst, Register src) {
2250   assert(VM_Version::supports_avx512bw(), "");
2251   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2252   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2253   emit_int8((unsigned char)0x92);
2254   emit_int8((unsigned char)(0xC0 | encode));
2255 }
2256 
2257 void Assembler::kmovdl(Register dst, KRegister src) {
2258   assert(VM_Version::supports_avx512bw(), "");
2259   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2260   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2261   emit_int8((unsigned char)0x93);
2262   emit_int8((unsigned char)(0xC0 | encode));
2263 }
2264 
2265 void Assembler::kmovql(KRegister dst, KRegister src) {
2266   assert(VM_Version::supports_avx512bw(), "");
2267   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2268   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2269   emit_int8((unsigned char)0x90);
2270   emit_int8((unsigned char)(0xC0 | encode));
2271 }
2272 
2273 void Assembler::kmovql(KRegister dst, Address src) {
2274   assert(VM_Version::supports_avx512bw(), "");
2275   InstructionMark im(this);
2276   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2277   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2278   emit_int8((unsigned char)0x90);
2279   emit_operand((Register)dst, src);
2280 }
2281 
2282 void Assembler::kmovql(Address dst, KRegister src) {
2283   assert(VM_Version::supports_avx512bw(), "");
2284   InstructionMark im(this);
2285   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2286   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2287   emit_int8((unsigned char)0x90);
2288   emit_operand((Register)src, dst);
2289 }
2290 
2291 void Assembler::kmovql(KRegister dst, Register src) {
2292   assert(VM_Version::supports_avx512bw(), "");
2293   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2294   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2295   emit_int8((unsigned char)0x92);
2296   emit_int8((unsigned char)(0xC0 | encode));
2297 }
2298 
2299 void Assembler::kmovql(Register dst, KRegister src) {
2300   assert(VM_Version::supports_avx512bw(), "");
2301   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2302   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2303   emit_int8((unsigned char)0x93);
2304   emit_int8((unsigned char)(0xC0 | encode));
2305 }
2306 
2307 void Assembler::knotwl(KRegister dst, KRegister src) {
2308   assert(VM_Version::supports_evex(), "");
2309   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2310   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2311   emit_int8((unsigned char)0x44);
2312   emit_int8((unsigned char)(0xC0 | encode));
2313 }
2314 
2315 // This instruction produces ZF or CF flags
2316 void Assembler::kortestbl(KRegister src1, KRegister src2) {
2317   assert(VM_Version::supports_avx512dq(), "");
2318   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2319   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2320   emit_int8((unsigned char)0x98);
2321   emit_int8((unsigned char)(0xC0 | encode));
2322 }
2323 
2324 // This instruction produces ZF or CF flags
2325 void Assembler::kortestwl(KRegister src1, KRegister src2) {
2326   assert(VM_Version::supports_evex(), "");
2327   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2328   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2329   emit_int8((unsigned char)0x98);
2330   emit_int8((unsigned char)(0xC0 | encode));
2331 }
2332 
2333 // This instruction produces ZF or CF flags
2334 void Assembler::kortestdl(KRegister src1, KRegister src2) {
2335   assert(VM_Version::supports_avx512bw(), "");
2336   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2337   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2338   emit_int8((unsigned char)0x98);
2339   emit_int8((unsigned char)(0xC0 | encode));
2340 }
2341 
2342 // This instruction produces ZF or CF flags
2343 void Assembler::kortestql(KRegister src1, KRegister src2) {
2344   assert(VM_Version::supports_avx512bw(), "");
2345   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2346   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2347   emit_int8((unsigned char)0x98);
2348   emit_int8((unsigned char)(0xC0 | encode));
2349 }
2350 
2351 // This instruction produces ZF or CF flags
2352 void Assembler::ktestql(KRegister src1, KRegister src2) {
2353   assert(VM_Version::supports_avx512bw(), "");
2354   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2355   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2356   emit_int8((unsigned char)0x99);
2357   emit_int8((unsigned char)(0xC0 | encode));
2358 }
2359 
2360 void Assembler::ktestq(KRegister src1, KRegister src2) {
2361   assert(VM_Version::supports_avx512bw(), "");
2362   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2363   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2364   emit_int8((unsigned char)0x99);
2365   emit_int8((unsigned char)(0xC0 | encode));
2366 }
2367 
2368 void Assembler::ktestd(KRegister src1, KRegister src2) {
2369   assert(VM_Version::supports_avx512bw(), "");
2370   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2371   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2372   emit_int8((unsigned char)0x99);
2373   emit_int8((unsigned char)(0xC0 | encode));
2374 }
2375 
2376 void Assembler::movb(Address dst, int imm8) {
2377   InstructionMark im(this);
2378    prefix(dst);
2379   emit_int8((unsigned char)0xC6);
2380   emit_operand(rax, dst, 1);
2381   emit_int8(imm8);
2382 }
2383 
2384 
2385 void Assembler::movb(Address dst, Register src) {
2386   assert(src->has_byte_register(), "must have byte register");
2387   InstructionMark im(this);
2388   prefix(dst, src, true);
2389   emit_int8((unsigned char)0x88);
2390   emit_operand(src, dst);
2391 }
2392 
2393 void Assembler::movdl(XMMRegister dst, Register src) {
2394   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2395   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2396   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2397   emit_int8(0x6E);
2398   emit_int8((unsigned char)(0xC0 | encode));
2399 }
2400 
2401 void Assembler::movdl(Register dst, XMMRegister src) {
2402   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2403   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2404   // swap src/dst to get correct prefix
2405   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2406   emit_int8(0x7E);
2407   emit_int8((unsigned char)(0xC0 | encode));
2408 }
2409 
2410 void Assembler::movdl(XMMRegister dst, Address src) {
2411   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2412   InstructionMark im(this);
2413   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2414   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2415   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2416   emit_int8(0x6E);
2417   emit_operand(dst, src);
2418 }
2419 
2420 void Assembler::movdl(Address dst, XMMRegister src) {
2421   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2422   InstructionMark im(this);
2423   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2424   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2425   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2426   emit_int8(0x7E);
2427   emit_operand(src, dst);
2428 }
2429 
2430 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
2431   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2432   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2433   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2434   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2435   emit_int8(0x6F);
2436   emit_int8((unsigned char)(0xC0 | encode));
2437 }
2438 
2439 void Assembler::movdqa(XMMRegister dst, Address src) {
2440   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2441   InstructionMark im(this);
2442   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2443   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2444   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2445   emit_int8(0x6F);
2446   emit_operand(dst, src);
2447 }
2448 
2449 void Assembler::movdqu(XMMRegister dst, Address src) {
2450   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2451   InstructionMark im(this);
2452   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2453   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2454   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2455   emit_int8(0x6F);
2456   emit_operand(dst, src);
2457 }
2458 
2459 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
2460   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2461   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2462   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2463   emit_int8(0x6F);
2464   emit_int8((unsigned char)(0xC0 | encode));
2465 }
2466 
2467 void Assembler::movdqu(Address dst, XMMRegister src) {
2468   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2469   InstructionMark im(this);
2470   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2471   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2472   attributes.reset_is_clear_context();
2473   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2474   emit_int8(0x7F);
2475   emit_operand(src, dst);
2476 }
2477 
2478 // Move Unaligned 256bit Vector
2479 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2480   assert(UseAVX > 0, "");
2481   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2482   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2483   emit_int8(0x6F);
2484   emit_int8((unsigned char)(0xC0 | encode));
2485 }
2486 
2487 void Assembler::vmovdqu(XMMRegister dst, Address src) {
2488   assert(UseAVX > 0, "");
2489   InstructionMark im(this);
2490   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2491   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2492   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2493   emit_int8(0x6F);
2494   emit_operand(dst, src);
2495 }
2496 
2497 void Assembler::vmovdqu(Address dst, XMMRegister src) {
2498   assert(UseAVX > 0, "");
2499   InstructionMark im(this);
2500   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2501   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2502   attributes.reset_is_clear_context();
2503   // swap src<->dst for encoding
2504   assert(src != xnoreg, "sanity");
2505   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2506   emit_int8(0x7F);
2507   emit_operand(src, dst);
2508 }
2509 
2510 // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
2511 void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) {
2512   assert(VM_Version::supports_evex(), "");
2513   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2514   attributes.set_is_evex_instruction();
2515   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2516   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2517   emit_int8(0x6F);
2518   emit_int8((unsigned char)(0xC0 | encode));
2519 }
2520 
2521 void Assembler::evmovdqub(XMMRegister dst, Address src, int vector_len) {
2522   assert(VM_Version::supports_evex(), "");
2523   InstructionMark im(this);
2524   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2525   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2526   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2527   attributes.set_is_evex_instruction();
2528   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2529   emit_int8(0x6F);
2530   emit_operand(dst, src);
2531 }
2532 
2533 void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
2534   assert(VM_Version::supports_evex(), "");
2535   assert(src != xnoreg, "sanity");
2536   InstructionMark im(this);
2537   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2538   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2539   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2540   attributes.set_is_evex_instruction();
2541   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2542   emit_int8(0x7F);
2543   emit_operand(src, dst);
2544 }
2545 
2546 void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, int vector_len) {
2547   assert(VM_Version::supports_avx512vlbw(), "");
2548   assert(is_vector_masking(), "");    // For stub code use only
2549   InstructionMark im(this);
2550   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
2551   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2552   attributes.set_embedded_opmask_register_specifier(mask);
2553   attributes.set_is_evex_instruction();
2554   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2555   emit_int8(0x6F);
2556   emit_operand(dst, src);
2557 }
2558 
2559 void Assembler::evmovdquw(XMMRegister dst, Address src, int vector_len) {
2560   assert(VM_Version::supports_evex(), "");
2561   InstructionMark im(this);
2562   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2563   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2564   attributes.set_is_evex_instruction();
2565   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2566   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2567   emit_int8(0x6F);
2568   emit_operand(dst, src);
2569 }
2570 
2571 void Assembler::evmovdquw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
2572   assert(is_vector_masking(), "");
2573   assert(VM_Version::supports_avx512vlbw(), "");
2574   InstructionMark im(this);
2575   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
2576   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2577   attributes.set_embedded_opmask_register_specifier(mask);
2578   attributes.set_is_evex_instruction();
2579   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2580   emit_int8(0x6F);
2581   emit_operand(dst, src);
2582 }
2583 
2584 void Assembler::evmovdquw(Address dst, XMMRegister src, int vector_len) {
2585   assert(VM_Version::supports_evex(), "");
2586   assert(src != xnoreg, "sanity");
2587   InstructionMark im(this);
2588   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2589   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2590   attributes.set_is_evex_instruction();
2591   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2592   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2593   emit_int8(0x7F);
2594   emit_operand(src, dst);
2595 }
2596 
2597 void Assembler::evmovdquw(Address dst, KRegister mask, XMMRegister src, int vector_len) {
2598   assert(VM_Version::supports_avx512vlbw(), "");
2599   assert(src != xnoreg, "sanity");
2600   InstructionMark im(this);
2601   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2602   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2603   attributes.reset_is_clear_context();
2604   attributes.set_embedded_opmask_register_specifier(mask);
2605   attributes.set_is_evex_instruction();
2606   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2607   emit_int8(0x7F);
2608   emit_operand(src, dst);
2609 }
2610 
2611 void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
2612   assert(VM_Version::supports_evex(), "");
2613   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2614   attributes.set_is_evex_instruction();
2615   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2616   emit_int8(0x6F);
2617   emit_int8((unsigned char)(0xC0 | encode));
2618 }
2619 
2620 void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) {
2621   assert(VM_Version::supports_evex(), "");
2622   InstructionMark im(this);
2623   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false , /* uses_vl */ true);
2624   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2625   attributes.set_is_evex_instruction();
2626   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2627   emit_int8(0x6F);
2628   emit_operand(dst, src);
2629 }
2630 
2631 void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
2632   assert(VM_Version::supports_evex(), "");
2633   assert(src != xnoreg, "sanity");
2634   InstructionMark im(this);
2635   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2636   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2637   attributes.reset_is_clear_context();
2638   attributes.set_is_evex_instruction();
2639   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2640   emit_int8(0x7F);
2641   emit_operand(src, dst);
2642 }
2643 
2644 void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
2645   assert(VM_Version::supports_evex(), "");
2646   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2647   attributes.set_is_evex_instruction();
2648   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2649   emit_int8(0x6F);
2650   emit_int8((unsigned char)(0xC0 | encode));
2651 }
2652 
2653 void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
2654   assert(VM_Version::supports_evex(), "");
2655   InstructionMark im(this);
2656   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2657   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2658   attributes.set_is_evex_instruction();
2659   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2660   emit_int8(0x6F);
2661   emit_operand(dst, src);
2662 }
2663 
2664 void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
2665   assert(VM_Version::supports_evex(), "");
2666   assert(src != xnoreg, "sanity");
2667   InstructionMark im(this);
2668   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2669   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2670   attributes.reset_is_clear_context();
2671   attributes.set_is_evex_instruction();
2672   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2673   emit_int8(0x7F);
2674   emit_operand(src, dst);
2675 }
2676 
2677 // Uses zero extension on 64bit
2678 
2679 void Assembler::movl(Register dst, int32_t imm32) {
2680   int encode = prefix_and_encode(dst->encoding());
2681   emit_int8((unsigned char)(0xB8 | encode));
2682   emit_int32(imm32);
2683 }
2684 
2685 void Assembler::movl(Register dst, Register src) {
2686   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2687   emit_int8((unsigned char)0x8B);
2688   emit_int8((unsigned char)(0xC0 | encode));
2689 }
2690 
2691 void Assembler::movl(Register dst, Address src) {
2692   InstructionMark im(this);
2693   prefix(src, dst);
2694   emit_int8((unsigned char)0x8B);
2695   emit_operand(dst, src);
2696 }
2697 
2698 void Assembler::movl(Address dst, int32_t imm32) {
2699   InstructionMark im(this);
2700   prefix(dst);
2701   emit_int8((unsigned char)0xC7);
2702   emit_operand(rax, dst, 4);
2703   emit_int32(imm32);
2704 }
2705 
2706 void Assembler::movl(Address dst, Register src) {
2707   InstructionMark im(this);
2708   prefix(dst, src);
2709   emit_int8((unsigned char)0x89);
2710   emit_operand(src, dst);
2711 }
2712 
2713 // New cpus require to use movsd and movss to avoid partial register stall
2714 // when loading from memory. But for old Opteron use movlpd instead of movsd.
2715 // The selection is done in MacroAssembler::movdbl() and movflt().
2716 void Assembler::movlpd(XMMRegister dst, Address src) {
2717   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2718   InstructionMark im(this);
2719   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2720   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2721   attributes.set_rex_vex_w_reverted();
2722   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2723   emit_int8(0x12);
2724   emit_operand(dst, src);
2725 }
2726 
2727 void Assembler::movq( MMXRegister dst, Address src ) {
2728   assert( VM_Version::supports_mmx(), "" );
2729   emit_int8(0x0F);
2730   emit_int8(0x6F);
2731   emit_operand(dst, src);
2732 }
2733 
2734 void Assembler::movq( Address dst, MMXRegister src ) {
2735   assert( VM_Version::supports_mmx(), "" );
2736   emit_int8(0x0F);
2737   emit_int8(0x7F);
2738   // workaround gcc (3.2.1-7a) bug
2739   // In that version of gcc with only an emit_operand(MMX, Address)
2740   // gcc will tail jump and try and reverse the parameters completely
2741   // obliterating dst in the process. By having a version available
2742   // that doesn't need to swap the args at the tail jump the bug is
2743   // avoided.
2744   emit_operand(dst, src);
2745 }
2746 
2747 void Assembler::movq(XMMRegister dst, Address src) {
2748   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2749   InstructionMark im(this);
2750   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2751   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2752   attributes.set_rex_vex_w_reverted();
2753   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2754   emit_int8(0x7E);
2755   emit_operand(dst, src);
2756 }
2757 
2758 void Assembler::movq(Address dst, XMMRegister src) {
2759   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2760   InstructionMark im(this);
2761   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2762   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2763   attributes.set_rex_vex_w_reverted();
2764   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2765   emit_int8((unsigned char)0xD6);
2766   emit_operand(src, dst);
2767 }
2768 
2769 void Assembler::movsbl(Register dst, Address src) { // movsxb
2770   InstructionMark im(this);
2771   prefix(src, dst);
2772   emit_int8(0x0F);
2773   emit_int8((unsigned char)0xBE);
2774   emit_operand(dst, src);
2775 }
2776 
2777 void Assembler::movsbl(Register dst, Register src) { // movsxb
2778   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2779   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2780   emit_int8(0x0F);
2781   emit_int8((unsigned char)0xBE);
2782   emit_int8((unsigned char)(0xC0 | encode));
2783 }
2784 
2785 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
2786   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2787   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2788   attributes.set_rex_vex_w_reverted();
2789   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2790   emit_int8(0x10);
2791   emit_int8((unsigned char)(0xC0 | encode));
2792 }
2793 
2794 void Assembler::movsd(XMMRegister dst, Address src) {
2795   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2796   InstructionMark im(this);
2797   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2798   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2799   attributes.set_rex_vex_w_reverted();
2800   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2801   emit_int8(0x10);
2802   emit_operand(dst, src);
2803 }
2804 
2805 void Assembler::movsd(Address dst, XMMRegister src) {
2806   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2807   InstructionMark im(this);
2808   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2809   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2810   attributes.reset_is_clear_context();
2811   attributes.set_rex_vex_w_reverted();
2812   simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2813   emit_int8(0x11);
2814   emit_operand(src, dst);
2815 }
2816 
2817 void Assembler::movss(XMMRegister dst, XMMRegister src) {
2818   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2819   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2820   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2821   emit_int8(0x10);
2822   emit_int8((unsigned char)(0xC0 | encode));
2823 }
2824 
2825 void Assembler::movss(XMMRegister dst, Address src) {
2826   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2827   InstructionMark im(this);
2828   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2829   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2830   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2831   emit_int8(0x10);
2832   emit_operand(dst, src);
2833 }
2834 
2835 void Assembler::movss(Address dst, XMMRegister src) {
2836   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2837   InstructionMark im(this);
2838   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2839   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2840   attributes.reset_is_clear_context();
2841   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2842   emit_int8(0x11);
2843   emit_operand(src, dst);
2844 }
2845 
2846 void Assembler::movswl(Register dst, Address src) { // movsxw
2847   InstructionMark im(this);
2848   prefix(src, dst);
2849   emit_int8(0x0F);
2850   emit_int8((unsigned char)0xBF);
2851   emit_operand(dst, src);
2852 }
2853 
2854 void Assembler::movswl(Register dst, Register src) { // movsxw
2855   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2856   emit_int8(0x0F);
2857   emit_int8((unsigned char)0xBF);
2858   emit_int8((unsigned char)(0xC0 | encode));
2859 }
2860 
2861 void Assembler::movw(Address dst, int imm16) {
2862   InstructionMark im(this);
2863 
2864   emit_int8(0x66); // switch to 16-bit mode
2865   prefix(dst);
2866   emit_int8((unsigned char)0xC7);
2867   emit_operand(rax, dst, 2);
2868   emit_int16(imm16);
2869 }
2870 
2871 void Assembler::movw(Register dst, Address src) {
2872   InstructionMark im(this);
2873   emit_int8(0x66);
2874   prefix(src, dst);
2875   emit_int8((unsigned char)0x8B);
2876   emit_operand(dst, src);
2877 }
2878 
2879 void Assembler::movw(Address dst, Register src) {
2880   InstructionMark im(this);
2881   emit_int8(0x66);
2882   prefix(dst, src);
2883   emit_int8((unsigned char)0x89);
2884   emit_operand(src, dst);
2885 }
2886 
2887 void Assembler::movzbl(Register dst, Address src) { // movzxb
2888   InstructionMark im(this);
2889   prefix(src, dst);
2890   emit_int8(0x0F);
2891   emit_int8((unsigned char)0xB6);
2892   emit_operand(dst, src);
2893 }
2894 
2895 void Assembler::movzbl(Register dst, Register src) { // movzxb
2896   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2897   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2898   emit_int8(0x0F);
2899   emit_int8((unsigned char)0xB6);
2900   emit_int8(0xC0 | encode);
2901 }
2902 
2903 void Assembler::movzwl(Register dst, Address src) { // movzxw
2904   InstructionMark im(this);
2905   prefix(src, dst);
2906   emit_int8(0x0F);
2907   emit_int8((unsigned char)0xB7);
2908   emit_operand(dst, src);
2909 }
2910 
2911 void Assembler::movzwl(Register dst, Register src) { // movzxw
2912   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2913   emit_int8(0x0F);
2914   emit_int8((unsigned char)0xB7);
2915   emit_int8(0xC0 | encode);
2916 }
2917 
2918 void Assembler::mull(Address src) {
2919   InstructionMark im(this);
2920   prefix(src);
2921   emit_int8((unsigned char)0xF7);
2922   emit_operand(rsp, src);
2923 }
2924 
2925 void Assembler::mull(Register src) {
2926   int encode = prefix_and_encode(src->encoding());
2927   emit_int8((unsigned char)0xF7);
2928   emit_int8((unsigned char)(0xE0 | encode));
2929 }
2930 
2931 void Assembler::mulsd(XMMRegister dst, Address src) {
2932   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2933   InstructionMark im(this);
2934   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2935   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2936   attributes.set_rex_vex_w_reverted();
2937   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2938   emit_int8(0x59);
2939   emit_operand(dst, src);
2940 }
2941 
2942 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
2943   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2944   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2945   attributes.set_rex_vex_w_reverted();
2946   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2947   emit_int8(0x59);
2948   emit_int8((unsigned char)(0xC0 | encode));
2949 }
2950 
2951 void Assembler::mulss(XMMRegister dst, Address src) {
2952   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2953   InstructionMark im(this);
2954   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2955   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2956   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2957   emit_int8(0x59);
2958   emit_operand(dst, src);
2959 }
2960 
2961 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
2962   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2963   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2964   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2965   emit_int8(0x59);
2966   emit_int8((unsigned char)(0xC0 | encode));
2967 }
2968 
2969 void Assembler::negl(Register dst) {
2970   int encode = prefix_and_encode(dst->encoding());
2971   emit_int8((unsigned char)0xF7);
2972   emit_int8((unsigned char)(0xD8 | encode));
2973 }
2974 
2975 void Assembler::nop(int i) {
2976 #ifdef ASSERT
2977   assert(i > 0, " ");
2978   // The fancy nops aren't currently recognized by debuggers making it a
2979   // pain to disassemble code while debugging. If asserts are on clearly
2980   // speed is not an issue so simply use the single byte traditional nop
2981   // to do alignment.
2982 
2983   for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
2984   return;
2985 
2986 #endif // ASSERT
2987 
2988   if (UseAddressNop && VM_Version::is_intel()) {
2989     //
2990     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
2991     //  1: 0x90
2992     //  2: 0x66 0x90
2993     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2994     //  4: 0x0F 0x1F 0x40 0x00
2995     //  5: 0x0F 0x1F 0x44 0x00 0x00
2996     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2997     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2998     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2999     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3000     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3001     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3002 
3003     // The rest coding is Intel specific - don't use consecutive address nops
3004 
3005     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3006     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3007     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3008     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3009 
3010     while(i >= 15) {
3011       // For Intel don't generate consecutive addess nops (mix with regular nops)
3012       i -= 15;
3013       emit_int8(0x66);   // size prefix
3014       emit_int8(0x66);   // size prefix
3015       emit_int8(0x66);   // size prefix
3016       addr_nop_8();
3017       emit_int8(0x66);   // size prefix
3018       emit_int8(0x66);   // size prefix
3019       emit_int8(0x66);   // size prefix
3020       emit_int8((unsigned char)0x90);
3021                          // nop
3022     }
3023     switch (i) {
3024       case 14:
3025         emit_int8(0x66); // size prefix
3026       case 13:
3027         emit_int8(0x66); // size prefix
3028       case 12:
3029         addr_nop_8();
3030         emit_int8(0x66); // size prefix
3031         emit_int8(0x66); // size prefix
3032         emit_int8(0x66); // size prefix
3033         emit_int8((unsigned char)0x90);
3034                          // nop
3035         break;
3036       case 11:
3037         emit_int8(0x66); // size prefix
3038       case 10:
3039         emit_int8(0x66); // size prefix
3040       case 9:
3041         emit_int8(0x66); // size prefix
3042       case 8:
3043         addr_nop_8();
3044         break;
3045       case 7:
3046         addr_nop_7();
3047         break;
3048       case 6:
3049         emit_int8(0x66); // size prefix
3050       case 5:
3051         addr_nop_5();
3052         break;
3053       case 4:
3054         addr_nop_4();
3055         break;
3056       case 3:
3057         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3058         emit_int8(0x66); // size prefix
3059       case 2:
3060         emit_int8(0x66); // size prefix
3061       case 1:
3062         emit_int8((unsigned char)0x90);
3063                          // nop
3064         break;
3065       default:
3066         assert(i == 0, " ");
3067     }
3068     return;
3069   }
3070   if (UseAddressNop && VM_Version::is_amd()) {
3071     //
3072     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
3073     //  1: 0x90
3074     //  2: 0x66 0x90
3075     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3076     //  4: 0x0F 0x1F 0x40 0x00
3077     //  5: 0x0F 0x1F 0x44 0x00 0x00
3078     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3079     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3080     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3081     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3082     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3083     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3084 
3085     // The rest coding is AMD specific - use consecutive address nops
3086 
3087     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3088     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3089     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3090     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3091     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3092     //     Size prefixes (0x66) are added for larger sizes
3093 
3094     while(i >= 22) {
3095       i -= 11;
3096       emit_int8(0x66); // size prefix
3097       emit_int8(0x66); // size prefix
3098       emit_int8(0x66); // size prefix
3099       addr_nop_8();
3100     }
3101     // Generate first nop for size between 21-12
3102     switch (i) {
3103       case 21:
3104         i -= 1;
3105         emit_int8(0x66); // size prefix
3106       case 20:
3107       case 19:
3108         i -= 1;
3109         emit_int8(0x66); // size prefix
3110       case 18:
3111       case 17:
3112         i -= 1;
3113         emit_int8(0x66); // size prefix
3114       case 16:
3115       case 15:
3116         i -= 8;
3117         addr_nop_8();
3118         break;
3119       case 14:
3120       case 13:
3121         i -= 7;
3122         addr_nop_7();
3123         break;
3124       case 12:
3125         i -= 6;
3126         emit_int8(0x66); // size prefix
3127         addr_nop_5();
3128         break;
3129       default:
3130         assert(i < 12, " ");
3131     }
3132 
3133     // Generate second nop for size between 11-1
3134     switch (i) {
3135       case 11:
3136         emit_int8(0x66); // size prefix
3137       case 10:
3138         emit_int8(0x66); // size prefix
3139       case 9:
3140         emit_int8(0x66); // size prefix
3141       case 8:
3142         addr_nop_8();
3143         break;
3144       case 7:
3145         addr_nop_7();
3146         break;
3147       case 6:
3148         emit_int8(0x66); // size prefix
3149       case 5:
3150         addr_nop_5();
3151         break;
3152       case 4:
3153         addr_nop_4();
3154         break;
3155       case 3:
3156         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3157         emit_int8(0x66); // size prefix
3158       case 2:
3159         emit_int8(0x66); // size prefix
3160       case 1:
3161         emit_int8((unsigned char)0x90);
3162                          // nop
3163         break;
3164       default:
3165         assert(i == 0, " ");
3166     }
3167     return;
3168   }
3169 
3170   // Using nops with size prefixes "0x66 0x90".
3171   // From AMD Optimization Guide:
3172   //  1: 0x90
3173   //  2: 0x66 0x90
3174   //  3: 0x66 0x66 0x90
3175   //  4: 0x66 0x66 0x66 0x90
3176   //  5: 0x66 0x66 0x90 0x66 0x90
3177   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
3178   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
3179   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
3180   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3181   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3182   //
3183   while(i > 12) {
3184     i -= 4;
3185     emit_int8(0x66); // size prefix
3186     emit_int8(0x66);
3187     emit_int8(0x66);
3188     emit_int8((unsigned char)0x90);
3189                      // nop
3190   }
3191   // 1 - 12 nops
3192   if(i > 8) {
3193     if(i > 9) {
3194       i -= 1;
3195       emit_int8(0x66);
3196     }
3197     i -= 3;
3198     emit_int8(0x66);
3199     emit_int8(0x66);
3200     emit_int8((unsigned char)0x90);
3201   }
3202   // 1 - 8 nops
3203   if(i > 4) {
3204     if(i > 6) {
3205       i -= 1;
3206       emit_int8(0x66);
3207     }
3208     i -= 3;
3209     emit_int8(0x66);
3210     emit_int8(0x66);
3211     emit_int8((unsigned char)0x90);
3212   }
3213   switch (i) {
3214     case 4:
3215       emit_int8(0x66);
3216     case 3:
3217       emit_int8(0x66);
3218     case 2:
3219       emit_int8(0x66);
3220     case 1:
3221       emit_int8((unsigned char)0x90);
3222       break;
3223     default:
3224       assert(i == 0, " ");
3225   }
3226 }
3227 
3228 void Assembler::notl(Register dst) {
3229   int encode = prefix_and_encode(dst->encoding());
3230   emit_int8((unsigned char)0xF7);
3231   emit_int8((unsigned char)(0xD0 | encode));
3232 }
3233 
3234 void Assembler::orl(Address dst, int32_t imm32) {
3235   InstructionMark im(this);
3236   prefix(dst);
3237   emit_arith_operand(0x81, rcx, dst, imm32);
3238 }
3239 
3240 void Assembler::orl(Register dst, int32_t imm32) {
3241   prefix(dst);
3242   emit_arith(0x81, 0xC8, dst, imm32);
3243 }
3244 
3245 void Assembler::orl(Register dst, Address src) {
3246   InstructionMark im(this);
3247   prefix(src, dst);
3248   emit_int8(0x0B);
3249   emit_operand(dst, src);
3250 }
3251 
3252 void Assembler::orl(Register dst, Register src) {
3253   (void) prefix_and_encode(dst->encoding(), src->encoding());
3254   emit_arith(0x0B, 0xC0, dst, src);
3255 }
3256 
3257 void Assembler::orl(Address dst, Register src) {
3258   InstructionMark im(this);
3259   prefix(dst, src);
3260   emit_int8(0x09);
3261   emit_operand(src, dst);
3262 }
3263 
3264 void Assembler::packuswb(XMMRegister dst, Address src) {
3265   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3266   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3267   InstructionMark im(this);
3268   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3269   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3270   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3271   emit_int8(0x67);
3272   emit_operand(dst, src);
3273 }
3274 
3275 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
3276   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3277   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3278   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3279   emit_int8(0x67);
3280   emit_int8((unsigned char)(0xC0 | encode));
3281 }
3282 
3283 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3284   assert(UseAVX > 0, "some form of AVX must be enabled");
3285   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3286   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3287   emit_int8(0x67);
3288   emit_int8((unsigned char)(0xC0 | encode));
3289 }
3290 
3291 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
3292   assert(VM_Version::supports_avx2(), "");
3293   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3294   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3295   emit_int8(0x00);
3296   emit_int8(0xC0 | encode);
3297   emit_int8(imm8);
3298 }
3299 
3300 void Assembler::vperm2i128(XMMRegister dst,  XMMRegister nds, XMMRegister src, int imm8) {
3301   assert(VM_Version::supports_avx2(), "");
3302   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3303   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3304   emit_int8(0x46);
3305   emit_int8(0xC0 | encode);
3306   emit_int8(imm8);
3307 }
3308 
3309 void Assembler::vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
3310   assert(VM_Version::supports_avx(), "");
3311   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3312   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3313   emit_int8(0x06);
3314   emit_int8(0xC0 | encode);
3315   emit_int8(imm8);
3316 }
3317 
3318 
3319 void Assembler::pause() {
3320   emit_int8((unsigned char)0xF3);
3321   emit_int8((unsigned char)0x90);
3322 }
3323 
3324 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
3325   assert(VM_Version::supports_sse4_2(), "");
3326   InstructionMark im(this);
3327   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3328   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3329   emit_int8(0x61);
3330   emit_operand(dst, src);
3331   emit_int8(imm8);
3332 }
3333 
3334 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
3335   assert(VM_Version::supports_sse4_2(), "");
3336   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3337   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3338   emit_int8(0x61);
3339   emit_int8((unsigned char)(0xC0 | encode));
3340   emit_int8(imm8);
3341 }
3342 
3343 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3344 void Assembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
3345   assert(VM_Version::supports_sse2(), "");
3346   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3347   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3348   emit_int8(0x74);
3349   emit_int8((unsigned char)(0xC0 | encode));
3350 }
3351 
3352 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3353 void Assembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3354   assert(VM_Version::supports_avx(), "");
3355   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3356   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3357   emit_int8(0x74);
3358   emit_int8((unsigned char)(0xC0 | encode));
3359 }
3360 
3361 // In this context, kdst is written the mask used to process the equal components
3362 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3363   assert(VM_Version::supports_avx512bw(), "");
3364   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3365   attributes.set_is_evex_instruction();
3366   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3367   emit_int8(0x74);
3368   emit_int8((unsigned char)(0xC0 | encode));
3369 }
3370 
3371 void Assembler::evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3372   assert(VM_Version::supports_avx512vlbw(), "");
3373   InstructionMark im(this);
3374   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3375   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3376   attributes.set_is_evex_instruction();
3377   int dst_enc = kdst->encoding();
3378   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3379   emit_int8(0x64);
3380   emit_operand(as_Register(dst_enc), src);
3381 }
3382 
3383 void Assembler::evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3384   assert(is_vector_masking(), "");
3385   assert(VM_Version::supports_avx512vlbw(), "");
3386   InstructionMark im(this);
3387   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3388   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3389   attributes.reset_is_clear_context();
3390   attributes.set_embedded_opmask_register_specifier(mask);
3391   attributes.set_is_evex_instruction();
3392   int dst_enc = kdst->encoding();
3393   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3394   emit_int8(0x64);
3395   emit_operand(as_Register(dst_enc), src);
3396 }
3397 
3398 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3399   assert(VM_Version::supports_avx512vlbw(), "");
3400   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3401   attributes.set_is_evex_instruction();
3402   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3403   emit_int8(0x3E);
3404   emit_int8((unsigned char)(0xC0 | encode));
3405   emit_int8(vcc);
3406 }
3407 
3408 void Assembler::evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3409   assert(is_vector_masking(), "");
3410   assert(VM_Version::supports_avx512vlbw(), "");
3411   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3412   attributes.reset_is_clear_context();
3413   attributes.set_embedded_opmask_register_specifier(mask);
3414   attributes.set_is_evex_instruction();
3415   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3416   emit_int8(0x3E);
3417   emit_int8((unsigned char)(0xC0 | encode));
3418   emit_int8(vcc);
3419 }
3420 
3421 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len) {
3422   assert(VM_Version::supports_avx512vlbw(), "");
3423   InstructionMark im(this);
3424   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3425   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3426   attributes.set_is_evex_instruction();
3427   int dst_enc = kdst->encoding();
3428   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3429   emit_int8(0x3E);
3430   emit_operand(as_Register(dst_enc), src);
3431   emit_int8(vcc);
3432 }
3433 
3434 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3435   assert(VM_Version::supports_avx512bw(), "");
3436   InstructionMark im(this);
3437   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3438   attributes.set_is_evex_instruction();
3439   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3440   int dst_enc = kdst->encoding();
3441   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3442   emit_int8(0x74);
3443   emit_operand(as_Register(dst_enc), src);
3444 }
3445 
3446 void Assembler::evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3447   assert(VM_Version::supports_avx512vlbw(), "");
3448   assert(is_vector_masking(), "");    // For stub code use only
3449   InstructionMark im(this);
3450   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_reg_mask */ false, /* uses_vl */ false);
3451   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3452   attributes.reset_is_clear_context();
3453   attributes.set_embedded_opmask_register_specifier(mask);
3454   attributes.set_is_evex_instruction();
3455   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3456   emit_int8(0x74);
3457   emit_operand(as_Register(kdst->encoding()), src);
3458 }
3459 
3460 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3461 void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
3462   assert(VM_Version::supports_sse2(), "");
3463   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3464   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3465   emit_int8(0x75);
3466   emit_int8((unsigned char)(0xC0 | encode));
3467 }
3468 
3469 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3470 void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3471   assert(VM_Version::supports_avx(), "");
3472   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3473   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3474   emit_int8(0x75);
3475   emit_int8((unsigned char)(0xC0 | encode));
3476 }
3477 
3478 // In this context, kdst is written the mask used to process the equal components
3479 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3480   assert(VM_Version::supports_avx512bw(), "");
3481   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3482   attributes.set_is_evex_instruction();
3483   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3484   emit_int8(0x75);
3485   emit_int8((unsigned char)(0xC0 | encode));
3486 }
3487 
3488 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3489   assert(VM_Version::supports_avx512bw(), "");
3490   InstructionMark im(this);
3491   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3492   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3493   attributes.set_is_evex_instruction();
3494   int dst_enc = kdst->encoding();
3495   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3496   emit_int8(0x75);
3497   emit_operand(as_Register(dst_enc), src);
3498 }
3499 
3500 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3501 void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
3502   assert(VM_Version::supports_sse2(), "");
3503   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3504   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3505   emit_int8(0x76);
3506   emit_int8((unsigned char)(0xC0 | encode));
3507 }
3508 
3509 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3510 void Assembler::vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3511   assert(VM_Version::supports_avx(), "");
3512   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3513   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3514   emit_int8(0x76);
3515   emit_int8((unsigned char)(0xC0 | encode));
3516 }
3517 
3518 // In this context, kdst is written the mask used to process the equal components
3519 void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3520   assert(VM_Version::supports_evex(), "");
3521   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3522   attributes.set_is_evex_instruction();
3523   attributes.reset_is_clear_context();
3524   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3525   emit_int8(0x76);
3526   emit_int8((unsigned char)(0xC0 | encode));
3527 }
3528 
3529 void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3530   assert(VM_Version::supports_evex(), "");
3531   InstructionMark im(this);
3532   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3533   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3534   attributes.reset_is_clear_context();
3535   attributes.set_is_evex_instruction();
3536   int dst_enc = kdst->encoding();
3537   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3538   emit_int8(0x76);
3539   emit_operand(as_Register(dst_enc), src);
3540 }
3541 
3542 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3543 void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) {
3544   assert(VM_Version::supports_sse4_1(), "");
3545   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3546   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3547   emit_int8(0x29);
3548   emit_int8((unsigned char)(0xC0 | encode));
3549 }
3550 
3551 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3552 void Assembler::vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3553   assert(VM_Version::supports_avx(), "");
3554   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3555   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3556   emit_int8(0x29);
3557   emit_int8((unsigned char)(0xC0 | encode));
3558 }
3559 
3560 // In this context, kdst is written the mask used to process the equal components
3561 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3562   assert(VM_Version::supports_evex(), "");
3563   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3564   attributes.reset_is_clear_context();
3565   attributes.set_is_evex_instruction();
3566   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3567   emit_int8(0x29);
3568   emit_int8((unsigned char)(0xC0 | encode));
3569 }
3570 
3571 // In this context, kdst is written the mask used to process the equal components
3572 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3573   assert(VM_Version::supports_evex(), "");
3574   InstructionMark im(this);
3575   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3576   attributes.reset_is_clear_context();
3577   attributes.set_is_evex_instruction();
3578   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
3579   int dst_enc = kdst->encoding();
3580   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3581   emit_int8(0x29);
3582   emit_operand(as_Register(dst_enc), src);
3583 }
3584 
3585 void Assembler::pmovmskb(Register dst, XMMRegister src) {
3586   assert(VM_Version::supports_sse2(), "");
3587   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3588   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3589   emit_int8((unsigned char)0xD7);
3590   emit_int8((unsigned char)(0xC0 | encode));
3591 }
3592 
3593 void Assembler::vpmovmskb(Register dst, XMMRegister src) {
3594   assert(VM_Version::supports_avx2(), "");
3595   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3596   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3597   emit_int8((unsigned char)0xD7);
3598   emit_int8((unsigned char)(0xC0 | encode));
3599 }
3600 
3601 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
3602   assert(VM_Version::supports_sse4_1(), "");
3603   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3604   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3605   emit_int8(0x16);
3606   emit_int8((unsigned char)(0xC0 | encode));
3607   emit_int8(imm8);
3608 }
3609 
3610 void Assembler::pextrd(Address dst, XMMRegister src, int imm8) {
3611   assert(VM_Version::supports_sse4_1(), "");
3612   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3613   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3614   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3615   emit_int8(0x16);
3616   emit_operand(src, dst);
3617   emit_int8(imm8);
3618 }
3619 
3620 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
3621   assert(VM_Version::supports_sse4_1(), "");
3622   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3623   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3624   emit_int8(0x16);
3625   emit_int8((unsigned char)(0xC0 | encode));
3626   emit_int8(imm8);
3627 }
3628 
3629 void Assembler::pextrq(Address dst, XMMRegister src, int imm8) {
3630   assert(VM_Version::supports_sse4_1(), "");
3631   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3632   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3633   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3634   emit_int8(0x16);
3635   emit_operand(src, dst);
3636   emit_int8(imm8);
3637 }
3638 
3639 void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
3640   assert(VM_Version::supports_sse2(), "");
3641   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3642   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3643   emit_int8((unsigned char)0xC5);
3644   emit_int8((unsigned char)(0xC0 | encode));
3645   emit_int8(imm8);
3646 }
3647 
3648 void Assembler::pextrw(Address dst, XMMRegister src, int imm8) {
3649   assert(VM_Version::supports_sse4_1(), "");
3650   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3651   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
3652   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3653   emit_int8((unsigned char)0x15);
3654   emit_operand(src, dst);
3655   emit_int8(imm8);
3656 }
3657 
3658 void Assembler::pextrb(Address dst, XMMRegister src, int imm8) {
3659   assert(VM_Version::supports_sse4_1(), "");
3660   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3661   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
3662   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3663   emit_int8(0x14);
3664   emit_operand(src, dst);
3665   emit_int8(imm8);
3666 }
3667 
3668 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
3669   assert(VM_Version::supports_sse4_1(), "");
3670   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3671   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3672   emit_int8(0x22);
3673   emit_int8((unsigned char)(0xC0 | encode));
3674   emit_int8(imm8);
3675 }
3676 
3677 void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) {
3678   assert(VM_Version::supports_sse4_1(), "");
3679   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3680   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3681   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3682   emit_int8(0x22);
3683   emit_operand(dst,src);
3684   emit_int8(imm8);
3685 }
3686 
3687 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
3688   assert(VM_Version::supports_sse4_1(), "");
3689   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3690   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3691   emit_int8(0x22);
3692   emit_int8((unsigned char)(0xC0 | encode));
3693   emit_int8(imm8);
3694 }
3695 
3696 void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) {
3697   assert(VM_Version::supports_sse4_1(), "");
3698   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3699   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3700   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3701   emit_int8(0x22);
3702   emit_operand(dst, src);
3703   emit_int8(imm8);
3704 }
3705 
3706 void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
3707   assert(VM_Version::supports_sse2(), "");
3708   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3709   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3710   emit_int8((unsigned char)0xC4);
3711   emit_int8((unsigned char)(0xC0 | encode));
3712   emit_int8(imm8);
3713 }
3714 
3715 void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) {
3716   assert(VM_Version::supports_sse2(), "");
3717   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3718   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
3719   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3720   emit_int8((unsigned char)0xC4);
3721   emit_operand(dst, src);
3722   emit_int8(imm8);
3723 }
3724 
3725 void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) {
3726   assert(VM_Version::supports_sse4_1(), "");
3727   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3728   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
3729   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3730   emit_int8(0x20);
3731   emit_operand(dst, src);
3732   emit_int8(imm8);
3733 }
3734 
3735 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
3736   assert(VM_Version::supports_sse4_1(), "");
3737   InstructionMark im(this);
3738   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3739   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3740   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3741   emit_int8(0x30);
3742   emit_operand(dst, src);
3743 }
3744 
3745 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
3746   assert(VM_Version::supports_sse4_1(), "");
3747   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3748   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3749   emit_int8(0x30);
3750   emit_int8((unsigned char)(0xC0 | encode));
3751 }
3752 
3753 void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
3754   assert(VM_Version::supports_avx(), "");
3755   InstructionMark im(this);
3756   assert(dst != xnoreg, "sanity");
3757   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3758   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3759   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3760   emit_int8(0x30);
3761   emit_operand(dst, src);
3762 }
3763 
3764 void Assembler::evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
3765   assert(is_vector_masking(), "");
3766   assert(VM_Version::supports_avx512vlbw(), "");
3767   assert(dst != xnoreg, "sanity");
3768   InstructionMark im(this);
3769   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3770   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3771   attributes.set_embedded_opmask_register_specifier(mask);
3772   attributes.set_is_evex_instruction();
3773   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3774   emit_int8(0x30);
3775   emit_operand(dst, src);
3776 }
3777 
3778 void Assembler::evpmovwb(Address dst, XMMRegister src, int vector_len) {
3779   assert(VM_Version::supports_avx512vlbw(), "");
3780   assert(src != xnoreg, "sanity");
3781   InstructionMark im(this);
3782   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3783   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3784   attributes.set_is_evex_instruction();
3785   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
3786   emit_int8(0x30);
3787   emit_operand(src, dst);
3788 }
3789 
3790 void Assembler::evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len) {
3791   assert(is_vector_masking(), "");
3792   assert(VM_Version::supports_avx512vlbw(), "");
3793   assert(src != xnoreg, "sanity");
3794   InstructionMark im(this);
3795   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3796   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3797   attributes.reset_is_clear_context();
3798   attributes.set_embedded_opmask_register_specifier(mask);
3799   attributes.set_is_evex_instruction();
3800   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
3801   emit_int8(0x30);
3802   emit_operand(src, dst);
3803 }
3804 
3805 // generic
3806 void Assembler::pop(Register dst) {
3807   int encode = prefix_and_encode(dst->encoding());
3808   emit_int8(0x58 | encode);
3809 }
3810 
3811 void Assembler::popcntl(Register dst, Address src) {
3812   assert(VM_Version::supports_popcnt(), "must support");
3813   InstructionMark im(this);
3814   emit_int8((unsigned char)0xF3);
3815   prefix(src, dst);
3816   emit_int8(0x0F);
3817   emit_int8((unsigned char)0xB8);
3818   emit_operand(dst, src);
3819 }
3820 
3821 void Assembler::popcntl(Register dst, Register src) {
3822   assert(VM_Version::supports_popcnt(), "must support");
3823   emit_int8((unsigned char)0xF3);
3824   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3825   emit_int8(0x0F);
3826   emit_int8((unsigned char)0xB8);
3827   emit_int8((unsigned char)(0xC0 | encode));
3828 }
3829 
3830 void Assembler::popf() {
3831   emit_int8((unsigned char)0x9D);
3832 }
3833 
3834 #ifndef _LP64 // no 32bit push/pop on amd64
3835 void Assembler::popl(Address dst) {
3836   // NOTE: this will adjust stack by 8byte on 64bits
3837   InstructionMark im(this);
3838   prefix(dst);
3839   emit_int8((unsigned char)0x8F);
3840   emit_operand(rax, dst);
3841 }
3842 #endif
3843 
3844 void Assembler::prefetch_prefix(Address src) {
3845   prefix(src);
3846   emit_int8(0x0F);
3847 }
3848 
3849 void Assembler::prefetchnta(Address src) {
3850   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3851   InstructionMark im(this);
3852   prefetch_prefix(src);
3853   emit_int8(0x18);
3854   emit_operand(rax, src); // 0, src
3855 }
3856 
3857 void Assembler::prefetchr(Address src) {
3858   assert(VM_Version::supports_3dnow_prefetch(), "must support");
3859   InstructionMark im(this);
3860   prefetch_prefix(src);
3861   emit_int8(0x0D);
3862   emit_operand(rax, src); // 0, src
3863 }
3864 
3865 void Assembler::prefetcht0(Address src) {
3866   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3867   InstructionMark im(this);
3868   prefetch_prefix(src);
3869   emit_int8(0x18);
3870   emit_operand(rcx, src); // 1, src
3871 }
3872 
3873 void Assembler::prefetcht1(Address src) {
3874   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3875   InstructionMark im(this);
3876   prefetch_prefix(src);
3877   emit_int8(0x18);
3878   emit_operand(rdx, src); // 2, src
3879 }
3880 
3881 void Assembler::prefetcht2(Address src) {
3882   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3883   InstructionMark im(this);
3884   prefetch_prefix(src);
3885   emit_int8(0x18);
3886   emit_operand(rbx, src); // 3, src
3887 }
3888 
3889 void Assembler::prefetchw(Address src) {
3890   assert(VM_Version::supports_3dnow_prefetch(), "must support");
3891   InstructionMark im(this);
3892   prefetch_prefix(src);
3893   emit_int8(0x0D);
3894   emit_operand(rcx, src); // 1, src
3895 }
3896 
3897 void Assembler::prefix(Prefix p) {
3898   emit_int8(p);
3899 }
3900 
3901 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
3902   assert(VM_Version::supports_ssse3(), "");
3903   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3904   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3905   emit_int8(0x00);
3906   emit_int8((unsigned char)(0xC0 | encode));
3907 }
3908 
3909 void Assembler::vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3910   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
3911          vector_len == AVX_256bit? VM_Version::supports_avx2() :
3912          0, "");
3913   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3914   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3915   emit_int8(0x00);
3916   emit_int8((unsigned char)(0xC0 | encode));
3917 }
3918 
3919 void Assembler::pshufb(XMMRegister dst, Address src) {
3920   assert(VM_Version::supports_ssse3(), "");
3921   InstructionMark im(this);
3922   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3923   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3924   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3925   emit_int8(0x00);
3926   emit_operand(dst, src);
3927 }
3928 
3929 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
3930   assert(isByte(mode), "invalid value");
3931   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3932   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
3933   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3934   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3935   emit_int8(0x70);
3936   emit_int8((unsigned char)(0xC0 | encode));
3937   emit_int8(mode & 0xFF);
3938 }
3939 
3940 void Assembler::vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len) {
3941   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
3942          vector_len == AVX_256bit? VM_Version::supports_avx2() :
3943          0, "");
3944   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3945   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3946   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3947   emit_int8(0x70);
3948   emit_int8((unsigned char)(0xC0 | encode));
3949   emit_int8(mode & 0xFF);
3950 }
3951 
3952 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
3953   assert(isByte(mode), "invalid value");
3954   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3955   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3956   InstructionMark im(this);
3957   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3958   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3959   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3960   emit_int8(0x70);
3961   emit_operand(dst, src);
3962   emit_int8(mode & 0xFF);
3963 }
3964 
3965 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
3966   assert(isByte(mode), "invalid value");
3967   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3968   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3969   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3970   emit_int8(0x70);
3971   emit_int8((unsigned char)(0xC0 | encode));
3972   emit_int8(mode & 0xFF);
3973 }
3974 
3975 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
3976   assert(isByte(mode), "invalid value");
3977   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3978   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3979   InstructionMark im(this);
3980   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3981   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3982   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3983   emit_int8(0x70);
3984   emit_operand(dst, src);
3985   emit_int8(mode & 0xFF);
3986 }
3987 
3988 void Assembler::psrldq(XMMRegister dst, int shift) {
3989   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
3990   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3991   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3992   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3993   emit_int8(0x73);
3994   emit_int8((unsigned char)(0xC0 | encode));
3995   emit_int8(shift);
3996 }
3997 
3998 void Assembler::pslldq(XMMRegister dst, int shift) {
3999   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
4000   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4001   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4002   // XMM7 is for /7 encoding: 66 0F 73 /7 ib
4003   int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4004   emit_int8(0x73);
4005   emit_int8((unsigned char)(0xC0 | encode));
4006   emit_int8(shift);
4007 }
4008 
4009 void Assembler::ptest(XMMRegister dst, Address src) {
4010   assert(VM_Version::supports_sse4_1(), "");
4011   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4012   InstructionMark im(this);
4013   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4014   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4015   emit_int8(0x17);
4016   emit_operand(dst, src);
4017 }
4018 
4019 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
4020   assert(VM_Version::supports_sse4_1(), "");
4021   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4022   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4023   emit_int8(0x17);
4024   emit_int8((unsigned char)(0xC0 | encode));
4025 }
4026 
4027 void Assembler::vptest(XMMRegister dst, Address src) {
4028   assert(VM_Version::supports_avx(), "");
4029   InstructionMark im(this);
4030   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4031   assert(dst != xnoreg, "sanity");
4032   // swap src<->dst for encoding
4033   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4034   emit_int8(0x17);
4035   emit_operand(dst, src);
4036 }
4037 
4038 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
4039   assert(VM_Version::supports_avx(), "");
4040   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4041   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4042   emit_int8(0x17);
4043   emit_int8((unsigned char)(0xC0 | encode));
4044 }
4045 
4046 void Assembler::punpcklbw(XMMRegister dst, Address src) {
4047   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4048   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4049   InstructionMark im(this);
4050   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
4051   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4052   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4053   emit_int8(0x60);
4054   emit_operand(dst, src);
4055 }
4056 
4057 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
4058   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4059   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
4060   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4061   emit_int8(0x60);
4062   emit_int8((unsigned char)(0xC0 | encode));
4063 }
4064 
4065 void Assembler::punpckldq(XMMRegister dst, Address src) {
4066   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4067   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4068   InstructionMark im(this);
4069   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4070   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4071   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4072   emit_int8(0x62);
4073   emit_operand(dst, src);
4074 }
4075 
4076 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
4077   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4078   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4079   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4080   emit_int8(0x62);
4081   emit_int8((unsigned char)(0xC0 | encode));
4082 }
4083 
4084 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
4085   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4086   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4087   attributes.set_rex_vex_w_reverted();
4088   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4089   emit_int8(0x6C);
4090   emit_int8((unsigned char)(0xC0 | encode));
4091 }
4092 
4093 void Assembler::push(int32_t imm32) {
4094   // in 64bits we push 64bits onto the stack but only
4095   // take a 32bit immediate
4096   emit_int8(0x68);
4097   emit_int32(imm32);
4098 }
4099 
4100 void Assembler::push(Register src) {
4101   int encode = prefix_and_encode(src->encoding());
4102 
4103   emit_int8(0x50 | encode);
4104 }
4105 
4106 void Assembler::pushf() {
4107   emit_int8((unsigned char)0x9C);
4108 }
4109 
4110 #ifndef _LP64 // no 32bit push/pop on amd64
4111 void Assembler::pushl(Address src) {
4112   // Note this will push 64bit on 64bit
4113   InstructionMark im(this);
4114   prefix(src);
4115   emit_int8((unsigned char)0xFF);
4116   emit_operand(rsi, src);
4117 }
4118 #endif
4119 
4120 void Assembler::rcll(Register dst, int imm8) {
4121   assert(isShiftCount(imm8), "illegal shift count");
4122   int encode = prefix_and_encode(dst->encoding());
4123   if (imm8 == 1) {
4124     emit_int8((unsigned char)0xD1);
4125     emit_int8((unsigned char)(0xD0 | encode));
4126   } else {
4127     emit_int8((unsigned char)0xC1);
4128     emit_int8((unsigned char)0xD0 | encode);
4129     emit_int8(imm8);
4130   }
4131 }
4132 
4133 void Assembler::rcpps(XMMRegister dst, XMMRegister src) {
4134   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4135   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4136   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4137   emit_int8(0x53);
4138   emit_int8((unsigned char)(0xC0 | encode));
4139 }
4140 
4141 void Assembler::rcpss(XMMRegister dst, XMMRegister src) {
4142   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4143   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4144   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4145   emit_int8(0x53);
4146   emit_int8((unsigned char)(0xC0 | encode));
4147 }
4148 
4149 void Assembler::rdtsc() {
4150   emit_int8((unsigned char)0x0F);
4151   emit_int8((unsigned char)0x31);
4152 }
4153 
4154 // copies data from [esi] to [edi] using rcx pointer sized words
4155 // generic
4156 void Assembler::rep_mov() {
4157   emit_int8((unsigned char)0xF3);
4158   // MOVSQ
4159   LP64_ONLY(prefix(REX_W));
4160   emit_int8((unsigned char)0xA5);
4161 }
4162 
4163 // sets rcx bytes with rax, value at [edi]
4164 void Assembler::rep_stosb() {
4165   emit_int8((unsigned char)0xF3); // REP
4166   LP64_ONLY(prefix(REX_W));
4167   emit_int8((unsigned char)0xAA); // STOSB
4168 }
4169 
4170 // sets rcx pointer sized words with rax, value at [edi]
4171 // generic
4172 void Assembler::rep_stos() {
4173   emit_int8((unsigned char)0xF3); // REP
4174   LP64_ONLY(prefix(REX_W));       // LP64:STOSQ, LP32:STOSD
4175   emit_int8((unsigned char)0xAB);
4176 }
4177 
4178 // scans rcx pointer sized words at [edi] for occurance of rax,
4179 // generic
4180 void Assembler::repne_scan() { // repne_scan
4181   emit_int8((unsigned char)0xF2);
4182   // SCASQ
4183   LP64_ONLY(prefix(REX_W));
4184   emit_int8((unsigned char)0xAF);
4185 }
4186 
4187 #ifdef _LP64
4188 // scans rcx 4 byte words at [edi] for occurance of rax,
4189 // generic
4190 void Assembler::repne_scanl() { // repne_scan
4191   emit_int8((unsigned char)0xF2);
4192   // SCASL
4193   emit_int8((unsigned char)0xAF);
4194 }
4195 #endif
4196 
4197 void Assembler::ret(int imm16) {
4198   if (imm16 == 0) {
4199     emit_int8((unsigned char)0xC3);
4200   } else {
4201     emit_int8((unsigned char)0xC2);
4202     emit_int16(imm16);
4203   }
4204 }
4205 
4206 void Assembler::sahf() {
4207 #ifdef _LP64
4208   // Not supported in 64bit mode
4209   ShouldNotReachHere();
4210 #endif
4211   emit_int8((unsigned char)0x9E);
4212 }
4213 
4214 void Assembler::sarl(Register dst, int imm8) {
4215   int encode = prefix_and_encode(dst->encoding());
4216   assert(isShiftCount(imm8), "illegal shift count");
4217   if (imm8 == 1) {
4218     emit_int8((unsigned char)0xD1);
4219     emit_int8((unsigned char)(0xF8 | encode));
4220   } else {
4221     emit_int8((unsigned char)0xC1);
4222     emit_int8((unsigned char)(0xF8 | encode));
4223     emit_int8(imm8);
4224   }
4225 }
4226 
4227 void Assembler::sarl(Register dst) {
4228   int encode = prefix_and_encode(dst->encoding());
4229   emit_int8((unsigned char)0xD3);
4230   emit_int8((unsigned char)(0xF8 | encode));
4231 }
4232 
4233 void Assembler::sbbl(Address dst, int32_t imm32) {
4234   InstructionMark im(this);
4235   prefix(dst);
4236   emit_arith_operand(0x81, rbx, dst, imm32);
4237 }
4238 
4239 void Assembler::sbbl(Register dst, int32_t imm32) {
4240   prefix(dst);
4241   emit_arith(0x81, 0xD8, dst, imm32);
4242 }
4243 
4244 
4245 void Assembler::sbbl(Register dst, Address src) {
4246   InstructionMark im(this);
4247   prefix(src, dst);
4248   emit_int8(0x1B);
4249   emit_operand(dst, src);
4250 }
4251 
4252 void Assembler::sbbl(Register dst, Register src) {
4253   (void) prefix_and_encode(dst->encoding(), src->encoding());
4254   emit_arith(0x1B, 0xC0, dst, src);
4255 }
4256 
4257 void Assembler::setb(Condition cc, Register dst) {
4258   assert(0 <= cc && cc < 16, "illegal cc");
4259   int encode = prefix_and_encode(dst->encoding(), true);
4260   emit_int8(0x0F);
4261   emit_int8((unsigned char)0x90 | cc);
4262   emit_int8((unsigned char)(0xC0 | encode));
4263 }
4264 
4265 void Assembler::palignr(XMMRegister dst, XMMRegister src, int imm8) {
4266   assert(VM_Version::supports_ssse3(), "");
4267   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
4268   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4269   emit_int8((unsigned char)0x0F);
4270   emit_int8((unsigned char)(0xC0 | encode));
4271   emit_int8(imm8);
4272 }
4273 
4274 void Assembler::vpalignr(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
4275   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4276          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4277          0, "");
4278   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
4279   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4280   emit_int8((unsigned char)0x0F);
4281   emit_int8((unsigned char)(0xC0 | encode));
4282   emit_int8(imm8);
4283 }
4284 
4285 void Assembler::pblendw(XMMRegister dst, XMMRegister src, int imm8) {
4286   assert(VM_Version::supports_sse4_1(), "");
4287   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4288   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4289   emit_int8((unsigned char)0x0E);
4290   emit_int8((unsigned char)(0xC0 | encode));
4291   emit_int8(imm8);
4292 }
4293 
4294 void Assembler::sha1rnds4(XMMRegister dst, XMMRegister src, int imm8) {
4295   assert(VM_Version::supports_sha(), "");
4296   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, /* rex_w */ false);
4297   emit_int8((unsigned char)0xCC);
4298   emit_int8((unsigned char)(0xC0 | encode));
4299   emit_int8((unsigned char)imm8);
4300 }
4301 
4302 void Assembler::sha1nexte(XMMRegister dst, XMMRegister src) {
4303   assert(VM_Version::supports_sha(), "");
4304   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4305   emit_int8((unsigned char)0xC8);
4306   emit_int8((unsigned char)(0xC0 | encode));
4307 }
4308 
4309 void Assembler::sha1msg1(XMMRegister dst, XMMRegister src) {
4310   assert(VM_Version::supports_sha(), "");
4311   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4312   emit_int8((unsigned char)0xC9);
4313   emit_int8((unsigned char)(0xC0 | encode));
4314 }
4315 
4316 void Assembler::sha1msg2(XMMRegister dst, XMMRegister src) {
4317   assert(VM_Version::supports_sha(), "");
4318   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4319   emit_int8((unsigned char)0xCA);
4320   emit_int8((unsigned char)(0xC0 | encode));
4321 }
4322 
4323 // xmm0 is implicit additional source to this instruction.
4324 void Assembler::sha256rnds2(XMMRegister dst, XMMRegister src) {
4325   assert(VM_Version::supports_sha(), "");
4326   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4327   emit_int8((unsigned char)0xCB);
4328   emit_int8((unsigned char)(0xC0 | encode));
4329 }
4330 
4331 void Assembler::sha256msg1(XMMRegister dst, XMMRegister src) {
4332   assert(VM_Version::supports_sha(), "");
4333   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4334   emit_int8((unsigned char)0xCC);
4335   emit_int8((unsigned char)(0xC0 | encode));
4336 }
4337 
4338 void Assembler::sha256msg2(XMMRegister dst, XMMRegister src) {
4339   assert(VM_Version::supports_sha(), "");
4340   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4341   emit_int8((unsigned char)0xCD);
4342   emit_int8((unsigned char)(0xC0 | encode));
4343 }
4344 
4345 
4346 void Assembler::shll(Register dst, int imm8) {
4347   assert(isShiftCount(imm8), "illegal shift count");
4348   int encode = prefix_and_encode(dst->encoding());
4349   if (imm8 == 1 ) {
4350     emit_int8((unsigned char)0xD1);
4351     emit_int8((unsigned char)(0xE0 | encode));
4352   } else {
4353     emit_int8((unsigned char)0xC1);
4354     emit_int8((unsigned char)(0xE0 | encode));
4355     emit_int8(imm8);
4356   }
4357 }
4358 
4359 void Assembler::shll(Register dst) {
4360   int encode = prefix_and_encode(dst->encoding());
4361   emit_int8((unsigned char)0xD3);
4362   emit_int8((unsigned char)(0xE0 | encode));
4363 }
4364 
4365 void Assembler::shrl(Register dst, int imm8) {
4366   assert(isShiftCount(imm8), "illegal shift count");
4367   int encode = prefix_and_encode(dst->encoding());
4368   emit_int8((unsigned char)0xC1);
4369   emit_int8((unsigned char)(0xE8 | encode));
4370   emit_int8(imm8);
4371 }
4372 
4373 void Assembler::shrl(Register dst) {
4374   int encode = prefix_and_encode(dst->encoding());
4375   emit_int8((unsigned char)0xD3);
4376   emit_int8((unsigned char)(0xE8 | encode));
4377 }
4378 
4379 // copies a single word from [esi] to [edi]
4380 void Assembler::smovl() {
4381   emit_int8((unsigned char)0xA5);
4382 }
4383 
4384 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
4385   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4386   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4387   attributes.set_rex_vex_w_reverted();
4388   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4389   emit_int8(0x51);
4390   emit_int8((unsigned char)(0xC0 | encode));
4391 }
4392 
4393 void Assembler::sqrtsd(XMMRegister dst, Address src) {
4394   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4395   InstructionMark im(this);
4396   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4397   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4398   attributes.set_rex_vex_w_reverted();
4399   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4400   emit_int8(0x51);
4401   emit_operand(dst, src);
4402 }
4403 
4404 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
4405   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4406   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4407   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4408   emit_int8(0x51);
4409   emit_int8((unsigned char)(0xC0 | encode));
4410 }
4411 
4412 void Assembler::std() {
4413   emit_int8((unsigned char)0xFD);
4414 }
4415 
4416 void Assembler::sqrtss(XMMRegister dst, Address src) {
4417   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4418   InstructionMark im(this);
4419   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4420   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4421   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4422   emit_int8(0x51);
4423   emit_operand(dst, src);
4424 }
4425 
4426 void Assembler::stmxcsr( Address dst) {
4427   if (UseAVX > 0 ) {
4428     assert(VM_Version::supports_avx(), "");
4429     InstructionMark im(this);
4430     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4431     vex_prefix(dst, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4432     emit_int8((unsigned char)0xAE);
4433     emit_operand(as_Register(3), dst);
4434   } else {
4435     NOT_LP64(assert(VM_Version::supports_sse(), ""));
4436     InstructionMark im(this);
4437     prefix(dst);
4438     emit_int8(0x0F);
4439     emit_int8((unsigned char)0xAE);
4440     emit_operand(as_Register(3), dst);
4441   }
4442 }
4443 
4444 void Assembler::subl(Address dst, int32_t imm32) {
4445   InstructionMark im(this);
4446   prefix(dst);
4447   emit_arith_operand(0x81, rbp, dst, imm32);
4448 }
4449 
4450 void Assembler::subl(Address dst, Register src) {
4451   InstructionMark im(this);
4452   prefix(dst, src);
4453   emit_int8(0x29);
4454   emit_operand(src, dst);
4455 }
4456 
4457 void Assembler::subl(Register dst, int32_t imm32) {
4458   prefix(dst);
4459   emit_arith(0x81, 0xE8, dst, imm32);
4460 }
4461 
4462 // Force generation of a 4 byte immediate value even if it fits into 8bit
4463 void Assembler::subl_imm32(Register dst, int32_t imm32) {
4464   prefix(dst);
4465   emit_arith_imm32(0x81, 0xE8, dst, imm32);
4466 }
4467 
4468 void Assembler::subl(Register dst, Address src) {
4469   InstructionMark im(this);
4470   prefix(src, dst);
4471   emit_int8(0x2B);
4472   emit_operand(dst, src);
4473 }
4474 
4475 void Assembler::subl(Register dst, Register src) {
4476   (void) prefix_and_encode(dst->encoding(), src->encoding());
4477   emit_arith(0x2B, 0xC0, dst, src);
4478 }
4479 
4480 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
4481   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4482   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4483   attributes.set_rex_vex_w_reverted();
4484   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4485   emit_int8(0x5C);
4486   emit_int8((unsigned char)(0xC0 | encode));
4487 }
4488 
4489 void Assembler::subsd(XMMRegister dst, Address src) {
4490   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4491   InstructionMark im(this);
4492   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4493   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4494   attributes.set_rex_vex_w_reverted();
4495   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4496   emit_int8(0x5C);
4497   emit_operand(dst, src);
4498 }
4499 
4500 void Assembler::subss(XMMRegister dst, XMMRegister src) {
4501   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4502   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false , /* uses_vl */ false);
4503   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4504   emit_int8(0x5C);
4505   emit_int8((unsigned char)(0xC0 | encode));
4506 }
4507 
4508 void Assembler::subss(XMMRegister dst, Address src) {
4509   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4510   InstructionMark im(this);
4511   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4512   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4513   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4514   emit_int8(0x5C);
4515   emit_operand(dst, src);
4516 }
4517 
4518 void Assembler::testb(Register dst, int imm8) {
4519   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
4520   (void) prefix_and_encode(dst->encoding(), true);
4521   emit_arith_b(0xF6, 0xC0, dst, imm8);
4522 }
4523 
4524 void Assembler::testb(Address dst, int imm8) {
4525   InstructionMark im(this);
4526   prefix(dst);
4527   emit_int8((unsigned char)0xF6);
4528   emit_operand(rax, dst, 1);
4529   emit_int8(imm8);
4530 }
4531 
4532 void Assembler::testl(Register dst, int32_t imm32) {
4533   // not using emit_arith because test
4534   // doesn't support sign-extension of
4535   // 8bit operands
4536   int encode = dst->encoding();
4537   if (encode == 0) {
4538     emit_int8((unsigned char)0xA9);
4539   } else {
4540     encode = prefix_and_encode(encode);
4541     emit_int8((unsigned char)0xF7);
4542     emit_int8((unsigned char)(0xC0 | encode));
4543   }
4544   emit_int32(imm32);
4545 }
4546 
4547 void Assembler::testl(Register dst, Register src) {
4548   (void) prefix_and_encode(dst->encoding(), src->encoding());
4549   emit_arith(0x85, 0xC0, dst, src);
4550 }
4551 
4552 void Assembler::testl(Register dst, Address src) {
4553   InstructionMark im(this);
4554   prefix(src, dst);
4555   emit_int8((unsigned char)0x85);
4556   emit_operand(dst, src);
4557 }
4558 
4559 void Assembler::tzcntl(Register dst, Register src) {
4560   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
4561   emit_int8((unsigned char)0xF3);
4562   int encode = prefix_and_encode(dst->encoding(), src->encoding());
4563   emit_int8(0x0F);
4564   emit_int8((unsigned char)0xBC);
4565   emit_int8((unsigned char)0xC0 | encode);
4566 }
4567 
4568 void Assembler::tzcntq(Register dst, Register src) {
4569   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
4570   emit_int8((unsigned char)0xF3);
4571   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4572   emit_int8(0x0F);
4573   emit_int8((unsigned char)0xBC);
4574   emit_int8((unsigned char)(0xC0 | encode));
4575 }
4576 
4577 void Assembler::ucomisd(XMMRegister dst, Address src) {
4578   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4579   InstructionMark im(this);
4580   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4581   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4582   attributes.set_rex_vex_w_reverted();
4583   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4584   emit_int8(0x2E);
4585   emit_operand(dst, src);
4586 }
4587 
4588 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
4589   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4590   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4591   attributes.set_rex_vex_w_reverted();
4592   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4593   emit_int8(0x2E);
4594   emit_int8((unsigned char)(0xC0 | encode));
4595 }
4596 
4597 void Assembler::ucomiss(XMMRegister dst, Address src) {
4598   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4599   InstructionMark im(this);
4600   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4601   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4602   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4603   emit_int8(0x2E);
4604   emit_operand(dst, src);
4605 }
4606 
4607 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
4608   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4609   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4610   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4611   emit_int8(0x2E);
4612   emit_int8((unsigned char)(0xC0 | encode));
4613 }
4614 
4615 void Assembler::xabort(int8_t imm8) {
4616   emit_int8((unsigned char)0xC6);
4617   emit_int8((unsigned char)0xF8);
4618   emit_int8((unsigned char)(imm8 & 0xFF));
4619 }
4620 
4621 void Assembler::xaddb(Address dst, Register src) {
4622   InstructionMark im(this);
4623   prefix(dst, src, true);
4624   emit_int8(0x0F);
4625   emit_int8((unsigned char)0xC0);
4626   emit_operand(src, dst);
4627 }
4628 
4629 void Assembler::xaddw(Address dst, Register src) {
4630   InstructionMark im(this);
4631   emit_int8(0x66);
4632   prefix(dst, src);
4633   emit_int8(0x0F);
4634   emit_int8((unsigned char)0xC1);
4635   emit_operand(src, dst);
4636 }
4637 
4638 void Assembler::xaddl(Address dst, Register src) {
4639   InstructionMark im(this);
4640   prefix(dst, src);
4641   emit_int8(0x0F);
4642   emit_int8((unsigned char)0xC1);
4643   emit_operand(src, dst);
4644 }
4645 
4646 void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
4647   InstructionMark im(this);
4648   relocate(rtype);
4649   if (abort.is_bound()) {
4650     address entry = target(abort);
4651     assert(entry != NULL, "abort entry NULL");
4652     intptr_t offset = entry - pc();
4653     emit_int8((unsigned char)0xC7);
4654     emit_int8((unsigned char)0xF8);
4655     emit_int32(offset - 6); // 2 opcode + 4 address
4656   } else {
4657     abort.add_patch_at(code(), locator());
4658     emit_int8((unsigned char)0xC7);
4659     emit_int8((unsigned char)0xF8);
4660     emit_int32(0);
4661   }
4662 }
4663 
4664 void Assembler::xchgb(Register dst, Address src) { // xchg
4665   InstructionMark im(this);
4666   prefix(src, dst, true);
4667   emit_int8((unsigned char)0x86);
4668   emit_operand(dst, src);
4669 }
4670 
4671 void Assembler::xchgw(Register dst, Address src) { // xchg
4672   InstructionMark im(this);
4673   emit_int8(0x66);
4674   prefix(src, dst);
4675   emit_int8((unsigned char)0x87);
4676   emit_operand(dst, src);
4677 }
4678 
4679 void Assembler::xchgl(Register dst, Address src) { // xchg
4680   InstructionMark im(this);
4681   prefix(src, dst);
4682   emit_int8((unsigned char)0x87);
4683   emit_operand(dst, src);
4684 }
4685 
4686 void Assembler::xchgl(Register dst, Register src) {
4687   int encode = prefix_and_encode(dst->encoding(), src->encoding());
4688   emit_int8((unsigned char)0x87);
4689   emit_int8((unsigned char)(0xC0 | encode));
4690 }
4691 
4692 void Assembler::xend() {
4693   emit_int8((unsigned char)0x0F);
4694   emit_int8((unsigned char)0x01);
4695   emit_int8((unsigned char)0xD5);
4696 }
4697 
4698 void Assembler::xgetbv() {
4699   emit_int8(0x0F);
4700   emit_int8(0x01);
4701   emit_int8((unsigned char)0xD0);
4702 }
4703 
4704 void Assembler::xorl(Register dst, int32_t imm32) {
4705   prefix(dst);
4706   emit_arith(0x81, 0xF0, dst, imm32);
4707 }
4708 
4709 void Assembler::xorl(Register dst, Address src) {
4710   InstructionMark im(this);
4711   prefix(src, dst);
4712   emit_int8(0x33);
4713   emit_operand(dst, src);
4714 }
4715 
4716 void Assembler::xorl(Register dst, Register src) {
4717   (void) prefix_and_encode(dst->encoding(), src->encoding());
4718   emit_arith(0x33, 0xC0, dst, src);
4719 }
4720 
4721 void Assembler::xorb(Register dst, Address src) {
4722   InstructionMark im(this);
4723   prefix(src, dst);
4724   emit_int8(0x32);
4725   emit_operand(dst, src);
4726 }
4727 
4728 // AVX 3-operands scalar float-point arithmetic instructions
4729 
4730 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
4731   assert(VM_Version::supports_avx(), "");
4732   InstructionMark im(this);
4733   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4734   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4735   attributes.set_rex_vex_w_reverted();
4736   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4737   emit_int8(0x58);
4738   emit_operand(dst, src);
4739 }
4740 
4741 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4742   assert(VM_Version::supports_avx(), "");
4743   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4744   attributes.set_rex_vex_w_reverted();
4745   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4746   emit_int8(0x58);
4747   emit_int8((unsigned char)(0xC0 | encode));
4748 }
4749 
4750 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
4751   assert(VM_Version::supports_avx(), "");
4752   InstructionMark im(this);
4753   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4754   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4755   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4756   emit_int8(0x58);
4757   emit_operand(dst, src);
4758 }
4759 
4760 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4761   assert(VM_Version::supports_avx(), "");
4762   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4763   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4764   emit_int8(0x58);
4765   emit_int8((unsigned char)(0xC0 | encode));
4766 }
4767 
4768 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
4769   assert(VM_Version::supports_avx(), "");
4770   InstructionMark im(this);
4771   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4772   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4773   attributes.set_rex_vex_w_reverted();
4774   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4775   emit_int8(0x5E);
4776   emit_operand(dst, src);
4777 }
4778 
4779 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4780   assert(VM_Version::supports_avx(), "");
4781   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4782   attributes.set_rex_vex_w_reverted();
4783   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4784   emit_int8(0x5E);
4785   emit_int8((unsigned char)(0xC0 | encode));
4786 }
4787 
4788 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
4789   assert(VM_Version::supports_avx(), "");
4790   InstructionMark im(this);
4791   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4792   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4793   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4794   emit_int8(0x5E);
4795   emit_operand(dst, src);
4796 }
4797 
4798 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4799   assert(VM_Version::supports_avx(), "");
4800   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4801   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4802   emit_int8(0x5E);
4803   emit_int8((unsigned char)(0xC0 | encode));
4804 }
4805 
4806 void Assembler::vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
4807   assert(VM_Version::supports_fma(), "");
4808   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4809   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4810   emit_int8((unsigned char)0xB9);
4811   emit_int8((unsigned char)(0xC0 | encode));
4812 }
4813 
4814 void Assembler::vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
4815   assert(VM_Version::supports_fma(), "");
4816   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4817   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4818   emit_int8((unsigned char)0xB9);
4819   emit_int8((unsigned char)(0xC0 | encode));
4820 }
4821 
4822 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
4823   assert(VM_Version::supports_avx(), "");
4824   InstructionMark im(this);
4825   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4826   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4827   attributes.set_rex_vex_w_reverted();
4828   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4829   emit_int8(0x59);
4830   emit_operand(dst, src);
4831 }
4832 
4833 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4834   assert(VM_Version::supports_avx(), "");
4835   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4836   attributes.set_rex_vex_w_reverted();
4837   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4838   emit_int8(0x59);
4839   emit_int8((unsigned char)(0xC0 | encode));
4840 }
4841 
4842 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
4843   assert(VM_Version::supports_avx(), "");
4844   InstructionMark im(this);
4845   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4846   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4847   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4848   emit_int8(0x59);
4849   emit_operand(dst, src);
4850 }
4851 
4852 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4853   assert(VM_Version::supports_avx(), "");
4854   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4855   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4856   emit_int8(0x59);
4857   emit_int8((unsigned char)(0xC0 | encode));
4858 }
4859 
4860 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
4861   assert(VM_Version::supports_avx(), "");
4862   InstructionMark im(this);
4863   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4864   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4865   attributes.set_rex_vex_w_reverted();
4866   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4867   emit_int8(0x5C);
4868   emit_operand(dst, src);
4869 }
4870 
4871 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4872   assert(VM_Version::supports_avx(), "");
4873   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4874   attributes.set_rex_vex_w_reverted();
4875   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4876   emit_int8(0x5C);
4877   emit_int8((unsigned char)(0xC0 | encode));
4878 }
4879 
4880 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
4881   assert(VM_Version::supports_avx(), "");
4882   InstructionMark im(this);
4883   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4884   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4885   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4886   emit_int8(0x5C);
4887   emit_operand(dst, src);
4888 }
4889 
4890 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4891   assert(VM_Version::supports_avx(), "");
4892   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
4893   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4894   emit_int8(0x5C);
4895   emit_int8((unsigned char)(0xC0 | encode));
4896 }
4897 
4898 //====================VECTOR ARITHMETIC=====================================
4899 
4900 // Float-point vector arithmetic
4901 
4902 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
4903   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4904   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4905   attributes.set_rex_vex_w_reverted();
4906   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4907   emit_int8(0x58);
4908   emit_int8((unsigned char)(0xC0 | encode));
4909 }
4910 
4911 void Assembler::addpd(XMMRegister dst, Address src) {
4912   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4913   InstructionMark im(this);
4914   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4915   attributes.set_rex_vex_w_reverted();
4916   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
4917   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4918   emit_int8(0x58);
4919   emit_operand(dst, src);
4920 }
4921 
4922 
4923 void Assembler::addps(XMMRegister dst, XMMRegister src) {
4924   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4925   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4926   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4927   emit_int8(0x58);
4928   emit_int8((unsigned char)(0xC0 | encode));
4929 }
4930 
4931 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4932   assert(VM_Version::supports_avx(), "");
4933   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4934   attributes.set_rex_vex_w_reverted();
4935   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4936   emit_int8(0x58);
4937   emit_int8((unsigned char)(0xC0 | encode));
4938 }
4939 
4940 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4941   assert(VM_Version::supports_avx(), "");
4942   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4943   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4944   emit_int8(0x58);
4945   emit_int8((unsigned char)(0xC0 | encode));
4946 }
4947 
4948 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4949   assert(VM_Version::supports_avx(), "");
4950   InstructionMark im(this);
4951   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4952   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
4953   attributes.set_rex_vex_w_reverted();
4954   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4955   emit_int8(0x58);
4956   emit_operand(dst, src);
4957 }
4958 
4959 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4960   assert(VM_Version::supports_avx(), "");
4961   InstructionMark im(this);
4962   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4963   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4964   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4965   emit_int8(0x58);
4966   emit_operand(dst, src);
4967 }
4968 
4969 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
4970   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4971   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4972   attributes.set_rex_vex_w_reverted();
4973   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4974   emit_int8(0x5C);
4975   emit_int8((unsigned char)(0xC0 | encode));
4976 }
4977 
4978 void Assembler::subps(XMMRegister dst, XMMRegister src) {
4979   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4980   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4981   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4982   emit_int8(0x5C);
4983   emit_int8((unsigned char)(0xC0 | encode));
4984 }
4985 
4986 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4987   assert(VM_Version::supports_avx(), "");
4988   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4989   attributes.set_rex_vex_w_reverted();
4990   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4991   emit_int8(0x5C);
4992   emit_int8((unsigned char)(0xC0 | encode));
4993 }
4994 
4995 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4996   assert(VM_Version::supports_avx(), "");
4997   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4998   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4999   emit_int8(0x5C);
5000   emit_int8((unsigned char)(0xC0 | encode));
5001 }
5002 
5003 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5004   assert(VM_Version::supports_avx(), "");
5005   InstructionMark im(this);
5006   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5007   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5008   attributes.set_rex_vex_w_reverted();
5009   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5010   emit_int8(0x5C);
5011   emit_operand(dst, src);
5012 }
5013 
5014 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5015   assert(VM_Version::supports_avx(), "");
5016   InstructionMark im(this);
5017   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5018   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5019   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5020   emit_int8(0x5C);
5021   emit_operand(dst, src);
5022 }
5023 
5024 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
5025   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5026   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5027   attributes.set_rex_vex_w_reverted();
5028   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5029   emit_int8(0x59);
5030   emit_int8((unsigned char)(0xC0 | encode));
5031 }
5032 
5033 void Assembler::mulpd(XMMRegister dst, Address src) {
5034   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5035   InstructionMark im(this);
5036   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5037   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5038   attributes.set_rex_vex_w_reverted();
5039   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5040   emit_int8(0x59);
5041   emit_operand(dst, src);
5042 }
5043 
5044 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
5045   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5046   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5047   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5048   emit_int8(0x59);
5049   emit_int8((unsigned char)(0xC0 | encode));
5050 }
5051 
5052 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5053   assert(VM_Version::supports_avx(), "");
5054   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5055   attributes.set_rex_vex_w_reverted();
5056   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5057   emit_int8(0x59);
5058   emit_int8((unsigned char)(0xC0 | encode));
5059 }
5060 
5061 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5062   assert(VM_Version::supports_avx(), "");
5063   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5064   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5065   emit_int8(0x59);
5066   emit_int8((unsigned char)(0xC0 | encode));
5067 }
5068 
5069 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5070   assert(VM_Version::supports_avx(), "");
5071   InstructionMark im(this);
5072   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5073   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5074   attributes.set_rex_vex_w_reverted();
5075   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5076   emit_int8(0x59);
5077   emit_operand(dst, src);
5078 }
5079 
5080 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5081   assert(VM_Version::supports_avx(), "");
5082   InstructionMark im(this);
5083   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5084   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5085   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5086   emit_int8(0x59);
5087   emit_operand(dst, src);
5088 }
5089 
5090 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
5091   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5092   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5093   attributes.set_rex_vex_w_reverted();
5094   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5095   emit_int8(0x5E);
5096   emit_int8((unsigned char)(0xC0 | encode));
5097 }
5098 
5099 void Assembler::divps(XMMRegister dst, XMMRegister src) {
5100   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5101   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5102   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5103   emit_int8(0x5E);
5104   emit_int8((unsigned char)(0xC0 | encode));
5105 }
5106 
5107 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5108   assert(VM_Version::supports_avx(), "");
5109   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5110   attributes.set_rex_vex_w_reverted();
5111   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5112   emit_int8(0x5E);
5113   emit_int8((unsigned char)(0xC0 | encode));
5114 }
5115 
5116 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5117   assert(VM_Version::supports_avx(), "");
5118   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5119   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5120   emit_int8(0x5E);
5121   emit_int8((unsigned char)(0xC0 | encode));
5122 }
5123 
5124 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5125   assert(VM_Version::supports_avx(), "");
5126   InstructionMark im(this);
5127   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5128   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5129   attributes.set_rex_vex_w_reverted();
5130   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5131   emit_int8(0x5E);
5132   emit_operand(dst, src);
5133 }
5134 
5135 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5136   assert(VM_Version::supports_avx(), "");
5137   InstructionMark im(this);
5138   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5139   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5140   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5141   emit_int8(0x5E);
5142   emit_operand(dst, src);
5143 }
5144 
5145 void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
5146   assert(VM_Version::supports_avx(), "");
5147   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5148   attributes.set_rex_vex_w_reverted();
5149   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5150   emit_int8(0x51);
5151   emit_int8((unsigned char)(0xC0 | encode));
5152 }
5153 
5154 void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
5155   assert(VM_Version::supports_avx(), "");
5156   InstructionMark im(this);
5157   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5158   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5159   attributes.set_rex_vex_w_reverted();
5160   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5161   emit_int8(0x51);
5162   emit_operand(dst, src);
5163 }
5164 
5165 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
5166   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5167   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5168   attributes.set_rex_vex_w_reverted();
5169   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5170   emit_int8(0x54);
5171   emit_int8((unsigned char)(0xC0 | encode));
5172 }
5173 
5174 void Assembler::andps(XMMRegister dst, XMMRegister src) {
5175   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5176   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5177   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5178   emit_int8(0x54);
5179   emit_int8((unsigned char)(0xC0 | encode));
5180 }
5181 
5182 void Assembler::andps(XMMRegister dst, Address src) {
5183   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5184   InstructionMark im(this);
5185   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5186   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5187   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5188   emit_int8(0x54);
5189   emit_operand(dst, src);
5190 }
5191 
5192 void Assembler::andpd(XMMRegister dst, Address src) {
5193   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5194   InstructionMark im(this);
5195   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5196   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5197   attributes.set_rex_vex_w_reverted();
5198   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5199   emit_int8(0x54);
5200   emit_operand(dst, src);
5201 }
5202 
5203 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5204   assert(VM_Version::supports_avx(), "");
5205   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5206   attributes.set_rex_vex_w_reverted();
5207   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5208   emit_int8(0x54);
5209   emit_int8((unsigned char)(0xC0 | encode));
5210 }
5211 
5212 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5213   assert(VM_Version::supports_avx(), "");
5214   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5215   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5216   emit_int8(0x54);
5217   emit_int8((unsigned char)(0xC0 | encode));
5218 }
5219 
5220 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5221   assert(VM_Version::supports_avx(), "");
5222   InstructionMark im(this);
5223   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5224   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5225   attributes.set_rex_vex_w_reverted();
5226   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5227   emit_int8(0x54);
5228   emit_operand(dst, src);
5229 }
5230 
5231 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5232   assert(VM_Version::supports_avx(), "");
5233   InstructionMark im(this);
5234   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5235   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5236   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5237   emit_int8(0x54);
5238   emit_operand(dst, src);
5239 }
5240 
5241 void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
5242   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5243   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5244   attributes.set_rex_vex_w_reverted();
5245   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5246   emit_int8(0x15);
5247   emit_int8((unsigned char)(0xC0 | encode));
5248 }
5249 
5250 void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
5251   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5252   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5253   attributes.set_rex_vex_w_reverted();
5254   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5255   emit_int8(0x14);
5256   emit_int8((unsigned char)(0xC0 | encode));
5257 }
5258 
5259 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
5260   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5261   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5262   attributes.set_rex_vex_w_reverted();
5263   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5264   emit_int8(0x57);
5265   emit_int8((unsigned char)(0xC0 | encode));
5266 }
5267 
5268 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
5269   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5270   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5271   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5272   emit_int8(0x57);
5273   emit_int8((unsigned char)(0xC0 | encode));
5274 }
5275 
5276 void Assembler::xorpd(XMMRegister dst, Address src) {
5277   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5278   InstructionMark im(this);
5279   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5280   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5281   attributes.set_rex_vex_w_reverted();
5282   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5283   emit_int8(0x57);
5284   emit_operand(dst, src);
5285 }
5286 
5287 void Assembler::xorps(XMMRegister dst, Address src) {
5288   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5289   InstructionMark im(this);
5290   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5291   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5292   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5293   emit_int8(0x57);
5294   emit_operand(dst, src);
5295 }
5296 
5297 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5298   assert(VM_Version::supports_avx(), "");
5299   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5300   attributes.set_rex_vex_w_reverted();
5301   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5302   emit_int8(0x57);
5303   emit_int8((unsigned char)(0xC0 | encode));
5304 }
5305 
5306 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5307   assert(VM_Version::supports_avx(), "");
5308   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5309   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5310   emit_int8(0x57);
5311   emit_int8((unsigned char)(0xC0 | encode));
5312 }
5313 
5314 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5315   assert(VM_Version::supports_avx(), "");
5316   InstructionMark im(this);
5317   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5318   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5319   attributes.set_rex_vex_w_reverted();
5320   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5321   emit_int8(0x57);
5322   emit_operand(dst, src);
5323 }
5324 
5325 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5326   assert(VM_Version::supports_avx(), "");
5327   InstructionMark im(this);
5328   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5329   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5330   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5331   emit_int8(0x57);
5332   emit_operand(dst, src);
5333 }
5334 
5335 // Integer vector arithmetic
5336 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5337   assert(VM_Version::supports_avx() && (vector_len == 0) ||
5338          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
5339   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
5340   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5341   emit_int8(0x01);
5342   emit_int8((unsigned char)(0xC0 | encode));
5343 }
5344 
5345 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5346   assert(VM_Version::supports_avx() && (vector_len == 0) ||
5347          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
5348   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
5349   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5350   emit_int8(0x02);
5351   emit_int8((unsigned char)(0xC0 | encode));
5352 }
5353 
5354 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
5355   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5356   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5357   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5358   emit_int8((unsigned char)0xFC);
5359   emit_int8((unsigned char)(0xC0 | encode));
5360 }
5361 
5362 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
5363   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5364   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5365   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5366   emit_int8((unsigned char)0xFD);
5367   emit_int8((unsigned char)(0xC0 | encode));
5368 }
5369 
5370 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
5371   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5372   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5373   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5374   emit_int8((unsigned char)0xFE);
5375   emit_int8((unsigned char)(0xC0 | encode));
5376 }
5377 
5378 void Assembler::paddd(XMMRegister dst, Address src) {
5379   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5380   InstructionMark im(this);
5381   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5382   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5383   emit_int8((unsigned char)0xFE);
5384   emit_operand(dst, src);
5385 }
5386 
5387 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
5388   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5389   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5390   attributes.set_rex_vex_w_reverted();
5391   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5392   emit_int8((unsigned char)0xD4);
5393   emit_int8((unsigned char)(0xC0 | encode));
5394 }
5395 
5396 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
5397   assert(VM_Version::supports_sse3(), "");
5398   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
5399   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5400   emit_int8(0x01);
5401   emit_int8((unsigned char)(0xC0 | encode));
5402 }
5403 
5404 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
5405   assert(VM_Version::supports_sse3(), "");
5406   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
5407   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5408   emit_int8(0x02);
5409   emit_int8((unsigned char)(0xC0 | encode));
5410 }
5411 
5412 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5413   assert(UseAVX > 0, "requires some form of AVX");
5414   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5415   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5416   emit_int8((unsigned char)0xFC);
5417   emit_int8((unsigned char)(0xC0 | encode));
5418 }
5419 
5420 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5421   assert(UseAVX > 0, "requires some form of AVX");
5422   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5423   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5424   emit_int8((unsigned char)0xFD);
5425   emit_int8((unsigned char)(0xC0 | encode));
5426 }
5427 
5428 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5429   assert(UseAVX > 0, "requires some form of AVX");
5430   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5431   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5432   emit_int8((unsigned char)0xFE);
5433   emit_int8((unsigned char)(0xC0 | encode));
5434 }
5435 
5436 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5437   assert(UseAVX > 0, "requires some form of AVX");
5438   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5439   attributes.set_rex_vex_w_reverted();
5440   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5441   emit_int8((unsigned char)0xD4);
5442   emit_int8((unsigned char)(0xC0 | encode));
5443 }
5444 
5445 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5446   assert(UseAVX > 0, "requires some form of AVX");
5447   InstructionMark im(this);
5448   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5449   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5450   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5451   emit_int8((unsigned char)0xFC);
5452   emit_operand(dst, src);
5453 }
5454 
5455 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5456   assert(UseAVX > 0, "requires some form of AVX");
5457   InstructionMark im(this);
5458   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5459   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5460   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5461   emit_int8((unsigned char)0xFD);
5462   emit_operand(dst, src);
5463 }
5464 
5465 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5466   assert(UseAVX > 0, "requires some form of AVX");
5467   InstructionMark im(this);
5468   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5469   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5470   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5471   emit_int8((unsigned char)0xFE);
5472   emit_operand(dst, src);
5473 }
5474 
5475 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5476   assert(UseAVX > 0, "requires some form of AVX");
5477   InstructionMark im(this);
5478   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5479   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5480   attributes.set_rex_vex_w_reverted();
5481   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5482   emit_int8((unsigned char)0xD4);
5483   emit_operand(dst, src);
5484 }
5485 
5486 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
5487   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5488   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5489   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5490   emit_int8((unsigned char)0xF8);
5491   emit_int8((unsigned char)(0xC0 | encode));
5492 }
5493 
5494 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
5495   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5496   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5497   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5498   emit_int8((unsigned char)0xF9);
5499   emit_int8((unsigned char)(0xC0 | encode));
5500 }
5501 
5502 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
5503   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5504   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5505   emit_int8((unsigned char)0xFA);
5506   emit_int8((unsigned char)(0xC0 | encode));
5507 }
5508 
5509 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
5510   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5511   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5512   attributes.set_rex_vex_w_reverted();
5513   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5514   emit_int8((unsigned char)0xFB);
5515   emit_int8((unsigned char)(0xC0 | encode));
5516 }
5517 
5518 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5519   assert(UseAVX > 0, "requires some form of AVX");
5520   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5521   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5522   emit_int8((unsigned char)0xF8);
5523   emit_int8((unsigned char)(0xC0 | encode));
5524 }
5525 
5526 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5527   assert(UseAVX > 0, "requires some form of AVX");
5528   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5529   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5530   emit_int8((unsigned char)0xF9);
5531   emit_int8((unsigned char)(0xC0 | encode));
5532 }
5533 
5534 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5535   assert(UseAVX > 0, "requires some form of AVX");
5536   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5537   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5538   emit_int8((unsigned char)0xFA);
5539   emit_int8((unsigned char)(0xC0 | encode));
5540 }
5541 
5542 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5543   assert(UseAVX > 0, "requires some form of AVX");
5544   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5545   attributes.set_rex_vex_w_reverted();
5546   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5547   emit_int8((unsigned char)0xFB);
5548   emit_int8((unsigned char)(0xC0 | encode));
5549 }
5550 
5551 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5552   assert(UseAVX > 0, "requires some form of AVX");
5553   InstructionMark im(this);
5554   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5555   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5556   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5557   emit_int8((unsigned char)0xF8);
5558   emit_operand(dst, src);
5559 }
5560 
5561 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5562   assert(UseAVX > 0, "requires some form of AVX");
5563   InstructionMark im(this);
5564   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5565   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5566   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5567   emit_int8((unsigned char)0xF9);
5568   emit_operand(dst, src);
5569 }
5570 
5571 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5572   assert(UseAVX > 0, "requires some form of AVX");
5573   InstructionMark im(this);
5574   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5575   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5576   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5577   emit_int8((unsigned char)0xFA);
5578   emit_operand(dst, src);
5579 }
5580 
5581 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5582   assert(UseAVX > 0, "requires some form of AVX");
5583   InstructionMark im(this);
5584   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5585   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5586   attributes.set_rex_vex_w_reverted();
5587   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5588   emit_int8((unsigned char)0xFB);
5589   emit_operand(dst, src);
5590 }
5591 
5592 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
5593   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5594   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5595   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5596   emit_int8((unsigned char)0xD5);
5597   emit_int8((unsigned char)(0xC0 | encode));
5598 }
5599 
5600 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
5601   assert(VM_Version::supports_sse4_1(), "");
5602   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5603   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5604   emit_int8(0x40);
5605   emit_int8((unsigned char)(0xC0 | encode));
5606 }
5607 
5608 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5609   assert(UseAVX > 0, "requires some form of AVX");
5610   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5611   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5612   emit_int8((unsigned char)0xD5);
5613   emit_int8((unsigned char)(0xC0 | encode));
5614 }
5615 
5616 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5617   assert(UseAVX > 0, "requires some form of AVX");
5618   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5619   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5620   emit_int8(0x40);
5621   emit_int8((unsigned char)(0xC0 | encode));
5622 }
5623 
5624 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5625   assert(UseAVX > 2, "requires some form of EVEX");
5626   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5627   attributes.set_is_evex_instruction();
5628   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5629   emit_int8(0x40);
5630   emit_int8((unsigned char)(0xC0 | encode));
5631 }
5632 
5633 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5634   assert(UseAVX > 0, "requires some form of AVX");
5635   InstructionMark im(this);
5636   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5637   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5638   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5639   emit_int8((unsigned char)0xD5);
5640   emit_operand(dst, src);
5641 }
5642 
5643 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5644   assert(UseAVX > 0, "requires some form of AVX");
5645   InstructionMark im(this);
5646   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5647   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5648   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5649   emit_int8(0x40);
5650   emit_operand(dst, src);
5651 }
5652 
5653 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5654   assert(UseAVX > 2, "requires some form of EVEX");
5655   InstructionMark im(this);
5656   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5657   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5658   attributes.set_is_evex_instruction();
5659   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5660   emit_int8(0x40);
5661   emit_operand(dst, src);
5662 }
5663 
5664 // Shift packed integers left by specified number of bits.
5665 void Assembler::psllw(XMMRegister dst, int shift) {
5666   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5667   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5668   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
5669   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5670   emit_int8(0x71);
5671   emit_int8((unsigned char)(0xC0 | encode));
5672   emit_int8(shift & 0xFF);
5673 }
5674 
5675 void Assembler::pslld(XMMRegister dst, int shift) {
5676   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5677   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5678   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
5679   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5680   emit_int8(0x72);
5681   emit_int8((unsigned char)(0xC0 | encode));
5682   emit_int8(shift & 0xFF);
5683 }
5684 
5685 void Assembler::psllq(XMMRegister dst, int shift) {
5686   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5687   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5688   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
5689   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5690   emit_int8(0x73);
5691   emit_int8((unsigned char)(0xC0 | encode));
5692   emit_int8(shift & 0xFF);
5693 }
5694 
5695 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
5696   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5697   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5698   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5699   emit_int8((unsigned char)0xF1);
5700   emit_int8((unsigned char)(0xC0 | encode));
5701 }
5702 
5703 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
5704   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5705   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5706   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5707   emit_int8((unsigned char)0xF2);
5708   emit_int8((unsigned char)(0xC0 | encode));
5709 }
5710 
5711 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
5712   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5713   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5714   attributes.set_rex_vex_w_reverted();
5715   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5716   emit_int8((unsigned char)0xF3);
5717   emit_int8((unsigned char)(0xC0 | encode));
5718 }
5719 
5720 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5721   assert(UseAVX > 0, "requires some form of AVX");
5722   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5723   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
5724   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5725   emit_int8(0x71);
5726   emit_int8((unsigned char)(0xC0 | encode));
5727   emit_int8(shift & 0xFF);
5728 }
5729 
5730 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5731   assert(UseAVX > 0, "requires some form of AVX");
5732   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5733   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5734   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
5735   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5736   emit_int8(0x72);
5737   emit_int8((unsigned char)(0xC0 | encode));
5738   emit_int8(shift & 0xFF);
5739 }
5740 
5741 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5742   assert(UseAVX > 0, "requires some form of AVX");
5743   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5744   attributes.set_rex_vex_w_reverted();
5745   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
5746   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5747   emit_int8(0x73);
5748   emit_int8((unsigned char)(0xC0 | encode));
5749   emit_int8(shift & 0xFF);
5750 }
5751 
5752 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5753   assert(UseAVX > 0, "requires some form of AVX");
5754   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5755   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5756   emit_int8((unsigned char)0xF1);
5757   emit_int8((unsigned char)(0xC0 | encode));
5758 }
5759 
5760 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5761   assert(UseAVX > 0, "requires some form of AVX");
5762   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5763   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5764   emit_int8((unsigned char)0xF2);
5765   emit_int8((unsigned char)(0xC0 | encode));
5766 }
5767 
5768 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5769   assert(UseAVX > 0, "requires some form of AVX");
5770   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5771   attributes.set_rex_vex_w_reverted();
5772   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5773   emit_int8((unsigned char)0xF3);
5774   emit_int8((unsigned char)(0xC0 | encode));
5775 }
5776 
5777 // Shift packed integers logically right by specified number of bits.
5778 void Assembler::psrlw(XMMRegister dst, int shift) {
5779   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5780   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5781   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
5782   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5783   emit_int8(0x71);
5784   emit_int8((unsigned char)(0xC0 | encode));
5785   emit_int8(shift & 0xFF);
5786 }
5787 
5788 void Assembler::psrld(XMMRegister dst, int shift) {
5789   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5790   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5791   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
5792   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5793   emit_int8(0x72);
5794   emit_int8((unsigned char)(0xC0 | encode));
5795   emit_int8(shift & 0xFF);
5796 }
5797 
5798 void Assembler::psrlq(XMMRegister dst, int shift) {
5799   // Do not confuse it with psrldq SSE2 instruction which
5800   // shifts 128 bit value in xmm register by number of bytes.
5801   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5802   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5803   attributes.set_rex_vex_w_reverted();
5804   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
5805   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5806   emit_int8(0x73);
5807   emit_int8((unsigned char)(0xC0 | encode));
5808   emit_int8(shift & 0xFF);
5809 }
5810 
5811 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
5812   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5813   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5814   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5815   emit_int8((unsigned char)0xD1);
5816   emit_int8((unsigned char)(0xC0 | encode));
5817 }
5818 
5819 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
5820   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5821   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5822   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5823   emit_int8((unsigned char)0xD2);
5824   emit_int8((unsigned char)(0xC0 | encode));
5825 }
5826 
5827 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
5828   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5829   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5830   attributes.set_rex_vex_w_reverted();
5831   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5832   emit_int8((unsigned char)0xD3);
5833   emit_int8((unsigned char)(0xC0 | encode));
5834 }
5835 
5836 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5837   assert(UseAVX > 0, "requires some form of AVX");
5838   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5839   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
5840   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5841   emit_int8(0x71);
5842   emit_int8((unsigned char)(0xC0 | encode));
5843   emit_int8(shift & 0xFF);
5844 }
5845 
5846 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5847   assert(UseAVX > 0, "requires some form of AVX");
5848   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5849   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
5850   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5851   emit_int8(0x72);
5852   emit_int8((unsigned char)(0xC0 | encode));
5853   emit_int8(shift & 0xFF);
5854 }
5855 
5856 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5857   assert(UseAVX > 0, "requires some form of AVX");
5858   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5859   attributes.set_rex_vex_w_reverted();
5860   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
5861   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5862   emit_int8(0x73);
5863   emit_int8((unsigned char)(0xC0 | encode));
5864   emit_int8(shift & 0xFF);
5865 }
5866 
5867 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5868   assert(UseAVX > 0, "requires some form of AVX");
5869   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5870   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5871   emit_int8((unsigned char)0xD1);
5872   emit_int8((unsigned char)(0xC0 | encode));
5873 }
5874 
5875 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5876   assert(UseAVX > 0, "requires some form of AVX");
5877   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5878   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5879   emit_int8((unsigned char)0xD2);
5880   emit_int8((unsigned char)(0xC0 | encode));
5881 }
5882 
5883 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5884   assert(UseAVX > 0, "requires some form of AVX");
5885   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5886   attributes.set_rex_vex_w_reverted();
5887   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5888   emit_int8((unsigned char)0xD3);
5889   emit_int8((unsigned char)(0xC0 | encode));
5890 }
5891 
5892 // Shift packed integers arithmetically right by specified number of bits.
5893 void Assembler::psraw(XMMRegister dst, int shift) {
5894   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5895   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5896   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
5897   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5898   emit_int8(0x71);
5899   emit_int8((unsigned char)(0xC0 | encode));
5900   emit_int8(shift & 0xFF);
5901 }
5902 
5903 void Assembler::psrad(XMMRegister dst, int shift) {
5904   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5905   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5906   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
5907   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5908   emit_int8(0x72);
5909   emit_int8((unsigned char)(0xC0 | encode));
5910   emit_int8(shift & 0xFF);
5911 }
5912 
5913 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
5914   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5915   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5916   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5917   emit_int8((unsigned char)0xE1);
5918   emit_int8((unsigned char)(0xC0 | encode));
5919 }
5920 
5921 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
5922   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5923   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5924   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5925   emit_int8((unsigned char)0xE2);
5926   emit_int8((unsigned char)(0xC0 | encode));
5927 }
5928 
5929 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5930   assert(UseAVX > 0, "requires some form of AVX");
5931   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5932   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
5933   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5934   emit_int8(0x71);
5935   emit_int8((unsigned char)(0xC0 | encode));
5936   emit_int8(shift & 0xFF);
5937 }
5938 
5939 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5940   assert(UseAVX > 0, "requires some form of AVX");
5941   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5942   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
5943   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5944   emit_int8(0x72);
5945   emit_int8((unsigned char)(0xC0 | encode));
5946   emit_int8(shift & 0xFF);
5947 }
5948 
5949 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5950   assert(UseAVX > 0, "requires some form of AVX");
5951   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5952   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5953   emit_int8((unsigned char)0xE1);
5954   emit_int8((unsigned char)(0xC0 | encode));
5955 }
5956 
5957 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5958   assert(UseAVX > 0, "requires some form of AVX");
5959   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5960   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5961   emit_int8((unsigned char)0xE2);
5962   emit_int8((unsigned char)(0xC0 | encode));
5963 }
5964 
5965 
5966 // logical operations packed integers
5967 void Assembler::pand(XMMRegister dst, XMMRegister src) {
5968   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5969   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5970   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5971   emit_int8((unsigned char)0xDB);
5972   emit_int8((unsigned char)(0xC0 | encode));
5973 }
5974 
5975 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5976   assert(UseAVX > 0, "requires some form of AVX");
5977   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5978   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5979   emit_int8((unsigned char)0xDB);
5980   emit_int8((unsigned char)(0xC0 | encode));
5981 }
5982 
5983 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5984   assert(UseAVX > 0, "requires some form of AVX");
5985   InstructionMark im(this);
5986   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5987   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5988   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5989   emit_int8((unsigned char)0xDB);
5990   emit_operand(dst, src);
5991 }
5992 
5993 void Assembler::pandn(XMMRegister dst, XMMRegister src) {
5994   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5995   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5996   attributes.set_rex_vex_w_reverted();
5997   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5998   emit_int8((unsigned char)0xDF);
5999   emit_int8((unsigned char)(0xC0 | encode));
6000 }
6001 
6002 void Assembler::por(XMMRegister dst, XMMRegister src) {
6003   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6004   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6005   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6006   emit_int8((unsigned char)0xEB);
6007   emit_int8((unsigned char)(0xC0 | encode));
6008 }
6009 
6010 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6011   assert(UseAVX > 0, "requires some form of AVX");
6012   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6013   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6014   emit_int8((unsigned char)0xEB);
6015   emit_int8((unsigned char)(0xC0 | encode));
6016 }
6017 
6018 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6019   assert(UseAVX > 0, "requires some form of AVX");
6020   InstructionMark im(this);
6021   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6022   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6023   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6024   emit_int8((unsigned char)0xEB);
6025   emit_operand(dst, src);
6026 }
6027 
6028 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
6029   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6030   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6031   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6032   emit_int8((unsigned char)0xEF);
6033   emit_int8((unsigned char)(0xC0 | encode));
6034 }
6035 
6036 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6037   assert(UseAVX > 0, "requires some form of AVX");
6038   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6039   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6040   emit_int8((unsigned char)0xEF);
6041   emit_int8((unsigned char)(0xC0 | encode));
6042 }
6043 
6044 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6045   assert(UseAVX > 0, "requires some form of AVX");
6046   InstructionMark im(this);
6047   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6048   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6049   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6050   emit_int8((unsigned char)0xEF);
6051   emit_operand(dst, src);
6052 }
6053 
6054 
6055 // vinserti forms
6056 
6057 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6058   assert(VM_Version::supports_avx2(), "");
6059   assert(imm8 <= 0x01, "imm8: %u", imm8);
6060   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6061   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6062   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6063   emit_int8(0x38);
6064   emit_int8((unsigned char)(0xC0 | encode));
6065   // 0x00 - insert into lower 128 bits
6066   // 0x01 - insert into upper 128 bits
6067   emit_int8(imm8 & 0x01);
6068 }
6069 
6070 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6071   assert(VM_Version::supports_avx2(), "");
6072   assert(dst != xnoreg, "sanity");
6073   assert(imm8 <= 0x01, "imm8: %u", imm8);
6074   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6075   InstructionMark im(this);
6076   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6077   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6078   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6079   emit_int8(0x38);
6080   emit_operand(dst, src);
6081   // 0x00 - insert into lower 128 bits
6082   // 0x01 - insert into upper 128 bits
6083   emit_int8(imm8 & 0x01);
6084 }
6085 
6086 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6087   assert(VM_Version::supports_evex(), "");
6088   assert(imm8 <= 0x03, "imm8: %u", imm8);
6089   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6090   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6091   emit_int8(0x38);
6092   emit_int8((unsigned char)(0xC0 | encode));
6093   // 0x00 - insert into q0 128 bits (0..127)
6094   // 0x01 - insert into q1 128 bits (128..255)
6095   // 0x02 - insert into q2 128 bits (256..383)
6096   // 0x03 - insert into q3 128 bits (384..511)
6097   emit_int8(imm8 & 0x03);
6098 }
6099 
6100 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6101   assert(VM_Version::supports_avx(), "");
6102   assert(dst != xnoreg, "sanity");
6103   assert(imm8 <= 0x03, "imm8: %u", imm8);
6104   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6105   InstructionMark im(this);
6106   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6107   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6108   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6109   emit_int8(0x18);
6110   emit_operand(dst, src);
6111   // 0x00 - insert into q0 128 bits (0..127)
6112   // 0x01 - insert into q1 128 bits (128..255)
6113   // 0x02 - insert into q2 128 bits (256..383)
6114   // 0x03 - insert into q3 128 bits (384..511)
6115   emit_int8(imm8 & 0x03);
6116 }
6117 
6118 void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6119   assert(VM_Version::supports_evex(), "");
6120   assert(imm8 <= 0x01, "imm8: %u", imm8);
6121   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6122   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6123   emit_int8(0x38);
6124   emit_int8((unsigned char)(0xC0 | encode));
6125   // 0x00 - insert into lower 256 bits
6126   // 0x01 - insert into upper 256 bits
6127   emit_int8(imm8 & 0x01);
6128 }
6129 
6130 
6131 // vinsertf forms
6132 
6133 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6134   assert(VM_Version::supports_avx(), "");
6135   assert(imm8 <= 0x01, "imm8: %u", imm8);
6136   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6137   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6138   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6139   emit_int8(0x18);
6140   emit_int8((unsigned char)(0xC0 | encode));
6141   // 0x00 - insert into lower 128 bits
6142   // 0x01 - insert into upper 128 bits
6143   emit_int8(imm8 & 0x01);
6144 }
6145 
6146 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6147   assert(VM_Version::supports_avx(), "");
6148   assert(dst != xnoreg, "sanity");
6149   assert(imm8 <= 0x01, "imm8: %u", imm8);
6150   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6151   InstructionMark im(this);
6152   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6153   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6154   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6155   emit_int8(0x18);
6156   emit_operand(dst, src);
6157   // 0x00 - insert into lower 128 bits
6158   // 0x01 - insert into upper 128 bits
6159   emit_int8(imm8 & 0x01);
6160 }
6161 
6162 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6163   assert(VM_Version::supports_evex(), "");
6164   assert(imm8 <= 0x03, "imm8: %u", imm8);
6165   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6166   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6167   emit_int8(0x18);
6168   emit_int8((unsigned char)(0xC0 | encode));
6169   // 0x00 - insert into q0 128 bits (0..127)
6170   // 0x01 - insert into q1 128 bits (128..255)
6171   // 0x02 - insert into q2 128 bits (256..383)
6172   // 0x03 - insert into q3 128 bits (384..511)
6173   emit_int8(imm8 & 0x03);
6174 }
6175 
6176 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6177   assert(VM_Version::supports_avx(), "");
6178   assert(dst != xnoreg, "sanity");
6179   assert(imm8 <= 0x03, "imm8: %u", imm8);
6180   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6181   InstructionMark im(this);
6182   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6183   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6184   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6185   emit_int8(0x18);
6186   emit_operand(dst, src);
6187   // 0x00 - insert into q0 128 bits (0..127)
6188   // 0x01 - insert into q1 128 bits (128..255)
6189   // 0x02 - insert into q2 128 bits (256..383)
6190   // 0x03 - insert into q3 128 bits (384..511)
6191   emit_int8(imm8 & 0x03);
6192 }
6193 
6194 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6195   assert(VM_Version::supports_evex(), "");
6196   assert(imm8 <= 0x01, "imm8: %u", imm8);
6197   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6198   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6199   emit_int8(0x1A);
6200   emit_int8((unsigned char)(0xC0 | encode));
6201   // 0x00 - insert into lower 256 bits
6202   // 0x01 - insert into upper 256 bits
6203   emit_int8(imm8 & 0x01);
6204 }
6205 
6206 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6207   assert(VM_Version::supports_evex(), "");
6208   assert(dst != xnoreg, "sanity");
6209   assert(imm8 <= 0x01, "imm8: %u", imm8);
6210   InstructionMark im(this);
6211   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6212   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
6213   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6214   emit_int8(0x1A);
6215   emit_operand(dst, src);
6216   // 0x00 - insert into lower 256 bits
6217   // 0x01 - insert into upper 256 bits
6218   emit_int8(imm8 & 0x01);
6219 }
6220 
6221 
6222 // vextracti forms
6223 
6224 void Assembler::vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6225   assert(VM_Version::supports_avx(), "");
6226   assert(imm8 <= 0x01, "imm8: %u", imm8);
6227   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6228   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6229   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6230   emit_int8(0x39);
6231   emit_int8((unsigned char)(0xC0 | encode));
6232   // 0x00 - extract from lower 128 bits
6233   // 0x01 - extract from upper 128 bits
6234   emit_int8(imm8 & 0x01);
6235 }
6236 
6237 void Assembler::vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
6238   assert(VM_Version::supports_avx2(), "");
6239   assert(src != xnoreg, "sanity");
6240   assert(imm8 <= 0x01, "imm8: %u", imm8);
6241   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6242   InstructionMark im(this);
6243   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6244   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6245   attributes.reset_is_clear_context();
6246   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6247   emit_int8(0x39);
6248   emit_operand(src, dst);
6249   // 0x00 - extract from lower 128 bits
6250   // 0x01 - extract from upper 128 bits
6251   emit_int8(imm8 & 0x01);
6252 }
6253 
6254 void Assembler::vextracti32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6255   assert(VM_Version::supports_avx(), "");
6256   assert(imm8 <= 0x03, "imm8: %u", imm8);
6257   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6258   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6259   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6260   emit_int8(0x39);
6261   emit_int8((unsigned char)(0xC0 | encode));
6262   // 0x00 - extract from bits 127:0
6263   // 0x01 - extract from bits 255:128
6264   // 0x02 - extract from bits 383:256
6265   // 0x03 - extract from bits 511:384
6266   emit_int8(imm8 & 0x03);
6267 }
6268 
6269 void Assembler::vextracti32x4(Address dst, XMMRegister src, uint8_t imm8) {
6270   assert(VM_Version::supports_evex(), "");
6271   assert(src != xnoreg, "sanity");
6272   assert(imm8 <= 0x03, "imm8: %u", imm8);
6273   InstructionMark im(this);
6274   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6275   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6276   attributes.reset_is_clear_context();
6277   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6278   emit_int8(0x39);
6279   emit_operand(src, dst);
6280   // 0x00 - extract from bits 127:0
6281   // 0x01 - extract from bits 255:128
6282   // 0x02 - extract from bits 383:256
6283   // 0x03 - extract from bits 511:384
6284   emit_int8(imm8 & 0x03);
6285 }
6286 
6287 void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6288   assert(VM_Version::supports_avx512dq(), "");
6289   assert(imm8 <= 0x03, "imm8: %u", imm8);
6290   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6291   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6292   emit_int8(0x39);
6293   emit_int8((unsigned char)(0xC0 | encode));
6294   // 0x00 - extract from bits 127:0
6295   // 0x01 - extract from bits 255:128
6296   // 0x02 - extract from bits 383:256
6297   // 0x03 - extract from bits 511:384
6298   emit_int8(imm8 & 0x03);
6299 }
6300 
6301 void Assembler::vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6302   assert(VM_Version::supports_evex(), "");
6303   assert(imm8 <= 0x01, "imm8: %u", imm8);
6304   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6305   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6306   emit_int8(0x3B);
6307   emit_int8((unsigned char)(0xC0 | encode));
6308   // 0x00 - extract from lower 256 bits
6309   // 0x01 - extract from upper 256 bits
6310   emit_int8(imm8 & 0x01);
6311 }
6312 
6313 
6314 // vextractf forms
6315 
6316 void Assembler::vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6317   assert(VM_Version::supports_avx(), "");
6318   assert(imm8 <= 0x01, "imm8: %u", imm8);
6319   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6320   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6321   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6322   emit_int8(0x19);
6323   emit_int8((unsigned char)(0xC0 | encode));
6324   // 0x00 - extract from lower 128 bits
6325   // 0x01 - extract from upper 128 bits
6326   emit_int8(imm8 & 0x01);
6327 }
6328 
6329 void Assembler::vextractf128(Address dst, XMMRegister src, uint8_t imm8) {
6330   assert(VM_Version::supports_avx(), "");
6331   assert(src != xnoreg, "sanity");
6332   assert(imm8 <= 0x01, "imm8: %u", imm8);
6333   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6334   InstructionMark im(this);
6335   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6336   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6337   attributes.reset_is_clear_context();
6338   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6339   emit_int8(0x19);
6340   emit_operand(src, dst);
6341   // 0x00 - extract from lower 128 bits
6342   // 0x01 - extract from upper 128 bits
6343   emit_int8(imm8 & 0x01);
6344 }
6345 
6346 void Assembler::vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6347   assert(VM_Version::supports_avx(), "");
6348   assert(imm8 <= 0x03, "imm8: %u", imm8);
6349   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6350   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6351   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6352   emit_int8(0x19);
6353   emit_int8((unsigned char)(0xC0 | encode));
6354   // 0x00 - extract from bits 127:0
6355   // 0x01 - extract from bits 255:128
6356   // 0x02 - extract from bits 383:256
6357   // 0x03 - extract from bits 511:384
6358   emit_int8(imm8 & 0x03);
6359 }
6360 
6361 void Assembler::vextractf32x4(Address dst, XMMRegister src, uint8_t imm8) {
6362   assert(VM_Version::supports_evex(), "");
6363   assert(src != xnoreg, "sanity");
6364   assert(imm8 <= 0x03, "imm8: %u", imm8);
6365   InstructionMark im(this);
6366   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6367   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6368   attributes.reset_is_clear_context();
6369   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6370   emit_int8(0x19);
6371   emit_operand(src, dst);
6372   // 0x00 - extract from bits 127:0
6373   // 0x01 - extract from bits 255:128
6374   // 0x02 - extract from bits 383:256
6375   // 0x03 - extract from bits 511:384
6376   emit_int8(imm8 & 0x03);
6377 }
6378 
6379 void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6380   assert(VM_Version::supports_avx512dq(), "");
6381   assert(imm8 <= 0x03, "imm8: %u", imm8);
6382   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6383   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6384   emit_int8(0x19);
6385   emit_int8((unsigned char)(0xC0 | encode));
6386   // 0x00 - extract from bits 127:0
6387   // 0x01 - extract from bits 255:128
6388   // 0x02 - extract from bits 383:256
6389   // 0x03 - extract from bits 511:384
6390   emit_int8(imm8 & 0x03);
6391 }
6392 
6393 void Assembler::vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6394   assert(VM_Version::supports_evex(), "");
6395   assert(imm8 <= 0x01, "imm8: %u", imm8);
6396   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6397   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6398   emit_int8(0x1B);
6399   emit_int8((unsigned char)(0xC0 | encode));
6400   // 0x00 - extract from lower 256 bits
6401   // 0x01 - extract from upper 256 bits
6402   emit_int8(imm8 & 0x01);
6403 }
6404 
6405 void Assembler::vextractf64x4(Address dst, XMMRegister src, uint8_t imm8) {
6406   assert(VM_Version::supports_evex(), "");
6407   assert(src != xnoreg, "sanity");
6408   assert(imm8 <= 0x01, "imm8: %u", imm8);
6409   InstructionMark im(this);
6410   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6411   attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */  EVEX_64bit);
6412   attributes.reset_is_clear_context();
6413   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6414   emit_int8(0x1B);
6415   emit_operand(src, dst);
6416   // 0x00 - extract from lower 256 bits
6417   // 0x01 - extract from upper 256 bits
6418   emit_int8(imm8 & 0x01);
6419 }
6420 
6421 
6422 // legacy word/dword replicate
6423 void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
6424   assert(VM_Version::supports_avx2(), "");
6425   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6426   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6427   emit_int8(0x79);
6428   emit_int8((unsigned char)(0xC0 | encode));
6429 }
6430 
6431 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
6432   assert(VM_Version::supports_avx2(), "");
6433   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6434   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6435   emit_int8(0x58);
6436   emit_int8((unsigned char)(0xC0 | encode));
6437 }
6438 
6439 
6440 // xmm/mem sourced byte/word/dword/qword replicate
6441 
6442 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6443 void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
6444   assert(VM_Version::supports_evex(), "");
6445   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6446   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6447   emit_int8(0x78);
6448   emit_int8((unsigned char)(0xC0 | encode));
6449 }
6450 
6451 void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
6452   assert(VM_Version::supports_evex(), "");
6453   assert(dst != xnoreg, "sanity");
6454   InstructionMark im(this);
6455   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6456   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
6457   // swap src<->dst for encoding
6458   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6459   emit_int8(0x78);
6460   emit_operand(dst, src);
6461 }
6462 
6463 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6464 void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
6465   assert(VM_Version::supports_evex(), "");
6466   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6467   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6468   emit_int8(0x79);
6469   emit_int8((unsigned char)(0xC0 | encode));
6470 }
6471 
6472 void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) {
6473   assert(VM_Version::supports_evex(), "");
6474   assert(dst != xnoreg, "sanity");
6475   InstructionMark im(this);
6476   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6477   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
6478   // swap src<->dst for encoding
6479   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6480   emit_int8(0x79);
6481   emit_operand(dst, src);
6482 }
6483 
6484 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
6485 void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
6486   assert(VM_Version::supports_evex(), "");
6487   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6488   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6489   emit_int8(0x58);
6490   emit_int8((unsigned char)(0xC0 | encode));
6491 }
6492 
6493 void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
6494   assert(VM_Version::supports_evex(), "");
6495   assert(dst != xnoreg, "sanity");
6496   InstructionMark im(this);
6497   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6498   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
6499   // swap src<->dst for encoding
6500   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6501   emit_int8(0x58);
6502   emit_operand(dst, src);
6503 }
6504 
6505 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
6506 void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
6507   assert(VM_Version::supports_evex(), "");
6508   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6509   attributes.set_rex_vex_w_reverted();
6510   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6511   emit_int8(0x59);
6512   emit_int8((unsigned char)(0xC0 | encode));
6513 }
6514 
6515 void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
6516   assert(VM_Version::supports_evex(), "");
6517   assert(dst != xnoreg, "sanity");
6518   InstructionMark im(this);
6519   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6520   attributes.set_rex_vex_w_reverted();
6521   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
6522   // swap src<->dst for encoding
6523   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6524   emit_int8(0x59);
6525   emit_operand(dst, src);
6526 }
6527 
6528 
6529 // scalar single/double precision replicate
6530 
6531 // duplicate single precision data from src into programmed locations in dest : requires AVX512VL
6532 void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
6533   assert(VM_Version::supports_evex(), "");
6534   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6535   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6536   emit_int8(0x18);
6537   emit_int8((unsigned char)(0xC0 | encode));
6538 }
6539 
6540 void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
6541   assert(VM_Version::supports_evex(), "");
6542   assert(dst != xnoreg, "sanity");
6543   InstructionMark im(this);
6544   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6545   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
6546   // swap src<->dst for encoding
6547   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6548   emit_int8(0x18);
6549   emit_operand(dst, src);
6550 }
6551 
6552 // duplicate double precision data from src into programmed locations in dest : requires AVX512VL
6553 void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
6554   assert(VM_Version::supports_evex(), "");
6555   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6556   attributes.set_rex_vex_w_reverted();
6557   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6558   emit_int8(0x19);
6559   emit_int8((unsigned char)(0xC0 | encode));
6560 }
6561 
6562 void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
6563   assert(VM_Version::supports_evex(), "");
6564   assert(dst != xnoreg, "sanity");
6565   InstructionMark im(this);
6566   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6567   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
6568   attributes.set_rex_vex_w_reverted();
6569   // swap src<->dst for encoding
6570   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6571   emit_int8(0x19);
6572   emit_operand(dst, src);
6573 }
6574 
6575 
6576 // gpr source broadcast forms
6577 
6578 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6579 void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
6580   assert(VM_Version::supports_evex(), "");
6581   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6582   attributes.set_is_evex_instruction();
6583   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6584   emit_int8(0x7A);
6585   emit_int8((unsigned char)(0xC0 | encode));
6586 }
6587 
6588 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6589 void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
6590   assert(VM_Version::supports_evex(), "");
6591   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6592   attributes.set_is_evex_instruction();
6593   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6594   emit_int8(0x7B);
6595   emit_int8((unsigned char)(0xC0 | encode));
6596 }
6597 
6598 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
6599 void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
6600   assert(VM_Version::supports_evex(), "");
6601   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6602   attributes.set_is_evex_instruction();
6603   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6604   emit_int8(0x7C);
6605   emit_int8((unsigned char)(0xC0 | encode));
6606 }
6607 
6608 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
6609 void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
6610   assert(VM_Version::supports_evex(), "");
6611   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6612   attributes.set_is_evex_instruction();
6613   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6614   emit_int8(0x7C);
6615   emit_int8((unsigned char)(0xC0 | encode));
6616 }
6617 
6618 
6619 // Carry-Less Multiplication Quadword
6620 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
6621   assert(VM_Version::supports_clmul(), "");
6622   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
6623   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6624   emit_int8(0x44);
6625   emit_int8((unsigned char)(0xC0 | encode));
6626   emit_int8((unsigned char)mask);
6627 }
6628 
6629 // Carry-Less Multiplication Quadword
6630 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
6631   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
6632   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
6633   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6634   emit_int8(0x44);
6635   emit_int8((unsigned char)(0xC0 | encode));
6636   emit_int8((unsigned char)mask);
6637 }
6638 
6639 void Assembler::vzeroupper() {
6640   if (UseVzeroupper) {
6641     assert(VM_Version::supports_avx(), "");
6642     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
6643     (void)vex_prefix_and_encode(0, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6644     emit_int8(0x77);
6645   }
6646 }
6647 
6648 #ifndef _LP64
6649 // 32bit only pieces of the assembler
6650 
6651 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
6652   // NO PREFIX AS NEVER 64BIT
6653   InstructionMark im(this);
6654   emit_int8((unsigned char)0x81);
6655   emit_int8((unsigned char)(0xF8 | src1->encoding()));
6656   emit_data(imm32, rspec, 0);
6657 }
6658 
6659 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
6660   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
6661   InstructionMark im(this);
6662   emit_int8((unsigned char)0x81);
6663   emit_operand(rdi, src1);
6664   emit_data(imm32, rspec, 0);
6665 }
6666 
6667 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
6668 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
6669 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
6670 void Assembler::cmpxchg8(Address adr) {
6671   InstructionMark im(this);
6672   emit_int8(0x0F);
6673   emit_int8((unsigned char)0xC7);
6674   emit_operand(rcx, adr);
6675 }
6676 
6677 void Assembler::decl(Register dst) {
6678   // Don't use it directly. Use MacroAssembler::decrementl() instead.
6679  emit_int8(0x48 | dst->encoding());
6680 }
6681 
6682 #endif // _LP64
6683 
6684 // 64bit typically doesn't use the x87 but needs to for the trig funcs
6685 
6686 void Assembler::fabs() {
6687   emit_int8((unsigned char)0xD9);
6688   emit_int8((unsigned char)0xE1);
6689 }
6690 
6691 void Assembler::fadd(int i) {
6692   emit_farith(0xD8, 0xC0, i);
6693 }
6694 
6695 void Assembler::fadd_d(Address src) {
6696   InstructionMark im(this);
6697   emit_int8((unsigned char)0xDC);
6698   emit_operand32(rax, src);
6699 }
6700 
6701 void Assembler::fadd_s(Address src) {
6702   InstructionMark im(this);
6703   emit_int8((unsigned char)0xD8);
6704   emit_operand32(rax, src);
6705 }
6706 
6707 void Assembler::fadda(int i) {
6708   emit_farith(0xDC, 0xC0, i);
6709 }
6710 
6711 void Assembler::faddp(int i) {
6712   emit_farith(0xDE, 0xC0, i);
6713 }
6714 
6715 void Assembler::fchs() {
6716   emit_int8((unsigned char)0xD9);
6717   emit_int8((unsigned char)0xE0);
6718 }
6719 
6720 void Assembler::fcom(int i) {
6721   emit_farith(0xD8, 0xD0, i);
6722 }
6723 
6724 void Assembler::fcomp(int i) {
6725   emit_farith(0xD8, 0xD8, i);
6726 }
6727 
6728 void Assembler::fcomp_d(Address src) {
6729   InstructionMark im(this);
6730   emit_int8((unsigned char)0xDC);
6731   emit_operand32(rbx, src);
6732 }
6733 
6734 void Assembler::fcomp_s(Address src) {
6735   InstructionMark im(this);
6736   emit_int8((unsigned char)0xD8);
6737   emit_operand32(rbx, src);
6738 }
6739 
6740 void Assembler::fcompp() {
6741   emit_int8((unsigned char)0xDE);
6742   emit_int8((unsigned char)0xD9);
6743 }
6744 
6745 void Assembler::fcos() {
6746   emit_int8((unsigned char)0xD9);
6747   emit_int8((unsigned char)0xFF);
6748 }
6749 
6750 void Assembler::fdecstp() {
6751   emit_int8((unsigned char)0xD9);
6752   emit_int8((unsigned char)0xF6);
6753 }
6754 
6755 void Assembler::fdiv(int i) {
6756   emit_farith(0xD8, 0xF0, i);
6757 }
6758 
6759 void Assembler::fdiv_d(Address src) {
6760   InstructionMark im(this);
6761   emit_int8((unsigned char)0xDC);
6762   emit_operand32(rsi, src);
6763 }
6764 
6765 void Assembler::fdiv_s(Address src) {
6766   InstructionMark im(this);
6767   emit_int8((unsigned char)0xD8);
6768   emit_operand32(rsi, src);
6769 }
6770 
6771 void Assembler::fdiva(int i) {
6772   emit_farith(0xDC, 0xF8, i);
6773 }
6774 
6775 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
6776 //       is erroneous for some of the floating-point instructions below.
6777 
6778 void Assembler::fdivp(int i) {
6779   emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
6780 }
6781 
6782 void Assembler::fdivr(int i) {
6783   emit_farith(0xD8, 0xF8, i);
6784 }
6785 
6786 void Assembler::fdivr_d(Address src) {
6787   InstructionMark im(this);
6788   emit_int8((unsigned char)0xDC);
6789   emit_operand32(rdi, src);
6790 }
6791 
6792 void Assembler::fdivr_s(Address src) {
6793   InstructionMark im(this);
6794   emit_int8((unsigned char)0xD8);
6795   emit_operand32(rdi, src);
6796 }
6797 
6798 void Assembler::fdivra(int i) {
6799   emit_farith(0xDC, 0xF0, i);
6800 }
6801 
6802 void Assembler::fdivrp(int i) {
6803   emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
6804 }
6805 
6806 void Assembler::ffree(int i) {
6807   emit_farith(0xDD, 0xC0, i);
6808 }
6809 
6810 void Assembler::fild_d(Address adr) {
6811   InstructionMark im(this);
6812   emit_int8((unsigned char)0xDF);
6813   emit_operand32(rbp, adr);
6814 }
6815 
6816 void Assembler::fild_s(Address adr) {
6817   InstructionMark im(this);
6818   emit_int8((unsigned char)0xDB);
6819   emit_operand32(rax, adr);
6820 }
6821 
6822 void Assembler::fincstp() {
6823   emit_int8((unsigned char)0xD9);
6824   emit_int8((unsigned char)0xF7);
6825 }
6826 
6827 void Assembler::finit() {
6828   emit_int8((unsigned char)0x9B);
6829   emit_int8((unsigned char)0xDB);
6830   emit_int8((unsigned char)0xE3);
6831 }
6832 
6833 void Assembler::fist_s(Address adr) {
6834   InstructionMark im(this);
6835   emit_int8((unsigned char)0xDB);
6836   emit_operand32(rdx, adr);
6837 }
6838 
6839 void Assembler::fistp_d(Address adr) {
6840   InstructionMark im(this);
6841   emit_int8((unsigned char)0xDF);
6842   emit_operand32(rdi, adr);
6843 }
6844 
6845 void Assembler::fistp_s(Address adr) {
6846   InstructionMark im(this);
6847   emit_int8((unsigned char)0xDB);
6848   emit_operand32(rbx, adr);
6849 }
6850 
6851 void Assembler::fld1() {
6852   emit_int8((unsigned char)0xD9);
6853   emit_int8((unsigned char)0xE8);
6854 }
6855 
6856 void Assembler::fld_d(Address adr) {
6857   InstructionMark im(this);
6858   emit_int8((unsigned char)0xDD);
6859   emit_operand32(rax, adr);
6860 }
6861 
6862 void Assembler::fld_s(Address adr) {
6863   InstructionMark im(this);
6864   emit_int8((unsigned char)0xD9);
6865   emit_operand32(rax, adr);
6866 }
6867 
6868 
6869 void Assembler::fld_s(int index) {
6870   emit_farith(0xD9, 0xC0, index);
6871 }
6872 
6873 void Assembler::fld_x(Address adr) {
6874   InstructionMark im(this);
6875   emit_int8((unsigned char)0xDB);
6876   emit_operand32(rbp, adr);
6877 }
6878 
6879 void Assembler::fldcw(Address src) {
6880   InstructionMark im(this);
6881   emit_int8((unsigned char)0xD9);
6882   emit_operand32(rbp, src);
6883 }
6884 
6885 void Assembler::fldenv(Address src) {
6886   InstructionMark im(this);
6887   emit_int8((unsigned char)0xD9);
6888   emit_operand32(rsp, src);
6889 }
6890 
6891 void Assembler::fldlg2() {
6892   emit_int8((unsigned char)0xD9);
6893   emit_int8((unsigned char)0xEC);
6894 }
6895 
6896 void Assembler::fldln2() {
6897   emit_int8((unsigned char)0xD9);
6898   emit_int8((unsigned char)0xED);
6899 }
6900 
6901 void Assembler::fldz() {
6902   emit_int8((unsigned char)0xD9);
6903   emit_int8((unsigned char)0xEE);
6904 }
6905 
6906 void Assembler::flog() {
6907   fldln2();
6908   fxch();
6909   fyl2x();
6910 }
6911 
6912 void Assembler::flog10() {
6913   fldlg2();
6914   fxch();
6915   fyl2x();
6916 }
6917 
6918 void Assembler::fmul(int i) {
6919   emit_farith(0xD8, 0xC8, i);
6920 }
6921 
6922 void Assembler::fmul_d(Address src) {
6923   InstructionMark im(this);
6924   emit_int8((unsigned char)0xDC);
6925   emit_operand32(rcx, src);
6926 }
6927 
6928 void Assembler::fmul_s(Address src) {
6929   InstructionMark im(this);
6930   emit_int8((unsigned char)0xD8);
6931   emit_operand32(rcx, src);
6932 }
6933 
6934 void Assembler::fmula(int i) {
6935   emit_farith(0xDC, 0xC8, i);
6936 }
6937 
6938 void Assembler::fmulp(int i) {
6939   emit_farith(0xDE, 0xC8, i);
6940 }
6941 
6942 void Assembler::fnsave(Address dst) {
6943   InstructionMark im(this);
6944   emit_int8((unsigned char)0xDD);
6945   emit_operand32(rsi, dst);
6946 }
6947 
6948 void Assembler::fnstcw(Address src) {
6949   InstructionMark im(this);
6950   emit_int8((unsigned char)0x9B);
6951   emit_int8((unsigned char)0xD9);
6952   emit_operand32(rdi, src);
6953 }
6954 
6955 void Assembler::fnstsw_ax() {
6956   emit_int8((unsigned char)0xDF);
6957   emit_int8((unsigned char)0xE0);
6958 }
6959 
6960 void Assembler::fprem() {
6961   emit_int8((unsigned char)0xD9);
6962   emit_int8((unsigned char)0xF8);
6963 }
6964 
6965 void Assembler::fprem1() {
6966   emit_int8((unsigned char)0xD9);
6967   emit_int8((unsigned char)0xF5);
6968 }
6969 
6970 void Assembler::frstor(Address src) {
6971   InstructionMark im(this);
6972   emit_int8((unsigned char)0xDD);
6973   emit_operand32(rsp, src);
6974 }
6975 
6976 void Assembler::fsin() {
6977   emit_int8((unsigned char)0xD9);
6978   emit_int8((unsigned char)0xFE);
6979 }
6980 
6981 void Assembler::fsqrt() {
6982   emit_int8((unsigned char)0xD9);
6983   emit_int8((unsigned char)0xFA);
6984 }
6985 
6986 void Assembler::fst_d(Address adr) {
6987   InstructionMark im(this);
6988   emit_int8((unsigned char)0xDD);
6989   emit_operand32(rdx, adr);
6990 }
6991 
6992 void Assembler::fst_s(Address adr) {
6993   InstructionMark im(this);
6994   emit_int8((unsigned char)0xD9);
6995   emit_operand32(rdx, adr);
6996 }
6997 
6998 void Assembler::fstp_d(Address adr) {
6999   InstructionMark im(this);
7000   emit_int8((unsigned char)0xDD);
7001   emit_operand32(rbx, adr);
7002 }
7003 
7004 void Assembler::fstp_d(int index) {
7005   emit_farith(0xDD, 0xD8, index);
7006 }
7007 
7008 void Assembler::fstp_s(Address adr) {
7009   InstructionMark im(this);
7010   emit_int8((unsigned char)0xD9);
7011   emit_operand32(rbx, adr);
7012 }
7013 
7014 void Assembler::fstp_x(Address adr) {
7015   InstructionMark im(this);
7016   emit_int8((unsigned char)0xDB);
7017   emit_operand32(rdi, adr);
7018 }
7019 
7020 void Assembler::fsub(int i) {
7021   emit_farith(0xD8, 0xE0, i);
7022 }
7023 
7024 void Assembler::fsub_d(Address src) {
7025   InstructionMark im(this);
7026   emit_int8((unsigned char)0xDC);
7027   emit_operand32(rsp, src);
7028 }
7029 
7030 void Assembler::fsub_s(Address src) {
7031   InstructionMark im(this);
7032   emit_int8((unsigned char)0xD8);
7033   emit_operand32(rsp, src);
7034 }
7035 
7036 void Assembler::fsuba(int i) {
7037   emit_farith(0xDC, 0xE8, i);
7038 }
7039 
7040 void Assembler::fsubp(int i) {
7041   emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
7042 }
7043 
7044 void Assembler::fsubr(int i) {
7045   emit_farith(0xD8, 0xE8, i);
7046 }
7047 
7048 void Assembler::fsubr_d(Address src) {
7049   InstructionMark im(this);
7050   emit_int8((unsigned char)0xDC);
7051   emit_operand32(rbp, src);
7052 }
7053 
7054 void Assembler::fsubr_s(Address src) {
7055   InstructionMark im(this);
7056   emit_int8((unsigned char)0xD8);
7057   emit_operand32(rbp, src);
7058 }
7059 
7060 void Assembler::fsubra(int i) {
7061   emit_farith(0xDC, 0xE0, i);
7062 }
7063 
7064 void Assembler::fsubrp(int i) {
7065   emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
7066 }
7067 
7068 void Assembler::ftan() {
7069   emit_int8((unsigned char)0xD9);
7070   emit_int8((unsigned char)0xF2);
7071   emit_int8((unsigned char)0xDD);
7072   emit_int8((unsigned char)0xD8);
7073 }
7074 
7075 void Assembler::ftst() {
7076   emit_int8((unsigned char)0xD9);
7077   emit_int8((unsigned char)0xE4);
7078 }
7079 
7080 void Assembler::fucomi(int i) {
7081   // make sure the instruction is supported (introduced for P6, together with cmov)
7082   guarantee(VM_Version::supports_cmov(), "illegal instruction");
7083   emit_farith(0xDB, 0xE8, i);
7084 }
7085 
7086 void Assembler::fucomip(int i) {
7087   // make sure the instruction is supported (introduced for P6, together with cmov)
7088   guarantee(VM_Version::supports_cmov(), "illegal instruction");
7089   emit_farith(0xDF, 0xE8, i);
7090 }
7091 
7092 void Assembler::fwait() {
7093   emit_int8((unsigned char)0x9B);
7094 }
7095 
7096 void Assembler::fxch(int i) {
7097   emit_farith(0xD9, 0xC8, i);
7098 }
7099 
7100 void Assembler::fyl2x() {
7101   emit_int8((unsigned char)0xD9);
7102   emit_int8((unsigned char)0xF1);
7103 }
7104 
7105 void Assembler::frndint() {
7106   emit_int8((unsigned char)0xD9);
7107   emit_int8((unsigned char)0xFC);
7108 }
7109 
7110 void Assembler::f2xm1() {
7111   emit_int8((unsigned char)0xD9);
7112   emit_int8((unsigned char)0xF0);
7113 }
7114 
7115 void Assembler::fldl2e() {
7116   emit_int8((unsigned char)0xD9);
7117   emit_int8((unsigned char)0xEA);
7118 }
7119 
7120 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
7121 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
7122 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
7123 static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
7124 
7125 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
7126 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
7127   if (pre > 0) {
7128     emit_int8(simd_pre[pre]);
7129   }
7130   if (rex_w) {
7131     prefixq(adr, xreg);
7132   } else {
7133     prefix(adr, xreg);
7134   }
7135   if (opc > 0) {
7136     emit_int8(0x0F);
7137     int opc2 = simd_opc[opc];
7138     if (opc2 > 0) {
7139       emit_int8(opc2);
7140     }
7141   }
7142 }
7143 
7144 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
7145   if (pre > 0) {
7146     emit_int8(simd_pre[pre]);
7147   }
7148   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : prefix_and_encode(dst_enc, src_enc);
7149   if (opc > 0) {
7150     emit_int8(0x0F);
7151     int opc2 = simd_opc[opc];
7152     if (opc2 > 0) {
7153       emit_int8(opc2);
7154     }
7155   }
7156   return encode;
7157 }
7158 
7159 
7160 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc) {
7161   int vector_len = _attributes->get_vector_len();
7162   bool vex_w = _attributes->is_rex_vex_w();
7163   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
7164     prefix(VEX_3bytes);
7165 
7166     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
7167     byte1 = (~byte1) & 0xE0;
7168     byte1 |= opc;
7169     emit_int8(byte1);
7170 
7171     int byte2 = ((~nds_enc) & 0xf) << 3;
7172     byte2 |= (vex_w ? VEX_W : 0) | ((vector_len > 0) ? 4 : 0) | pre;
7173     emit_int8(byte2);
7174   } else {
7175     prefix(VEX_2bytes);
7176 
7177     int byte1 = vex_r ? VEX_R : 0;
7178     byte1 = (~byte1) & 0x80;
7179     byte1 |= ((~nds_enc) & 0xf) << 3;
7180     byte1 |= ((vector_len > 0 ) ? 4 : 0) | pre;
7181     emit_int8(byte1);
7182   }
7183 }
7184 
7185 // This is a 4 byte encoding
7186 void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v, int nds_enc, VexSimdPrefix pre, VexOpcode opc){
7187   // EVEX 0x62 prefix
7188   prefix(EVEX_4bytes);
7189   bool vex_w = _attributes->is_rex_vex_w();
7190   int evex_encoding = (vex_w ? VEX_W : 0);
7191   // EVEX.b is not currently used for broadcast of single element or data rounding modes
7192   _attributes->set_evex_encoding(evex_encoding);
7193 
7194   // P0: byte 2, initialized to RXBR`00mm
7195   // instead of not'd
7196   int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0);
7197   byte2 = (~byte2) & 0xF0;
7198   // confine opc opcode extensions in mm bits to lower two bits
7199   // of form {0F, 0F_38, 0F_3A}
7200   byte2 |= opc;
7201   emit_int8(byte2);
7202 
7203   // P1: byte 3 as Wvvvv1pp
7204   int byte3 = ((~nds_enc) & 0xf) << 3;
7205   // p[10] is always 1
7206   byte3 |= EVEX_F;
7207   byte3 |= (vex_w & 1) << 7;
7208   // confine pre opcode extensions in pp bits to lower two bits
7209   // of form {66, F3, F2}
7210   byte3 |= pre;
7211   emit_int8(byte3);
7212 
7213   // P2: byte 4 as zL'Lbv'aaa
7214   // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
7215   int byte4 = (_attributes->is_no_reg_mask()) ?
7216               0 :
7217               _attributes->get_embedded_opmask_register_specifier();
7218   // EVEX.v` for extending EVEX.vvvv or VIDX
7219   byte4 |= (evex_v ? 0: EVEX_V);
7220   // third EXEC.b for broadcast actions
7221   byte4 |= (_attributes->is_extended_context() ? EVEX_Rb : 0);
7222   // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
7223   byte4 |= ((_attributes->get_vector_len())& 0x3) << 5;
7224   // last is EVEX.z for zero/merge actions
7225   if (_attributes->is_no_reg_mask() == false) {
7226     byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0);
7227   }
7228   emit_int8(byte4);
7229 }
7230 
7231 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
7232   bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0;
7233   bool vex_b = adr.base_needs_rex();
7234   bool vex_x = adr.index_needs_rex();
7235   set_attributes(attributes);
7236   attributes->set_current_assembler(this);
7237 
7238   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
7239   if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
7240     switch (attributes->get_vector_len()) {
7241     case AVX_128bit:
7242     case AVX_256bit:
7243       attributes->set_is_legacy_mode();
7244       break;
7245     }
7246   }
7247 
7248   // For pure EVEX check and see if this instruction
7249   // is allowed in legacy mode and has resources which will
7250   // fit in it.  Pure EVEX instructions will use set_is_evex_instruction in their definition,
7251   // else that field is set when we encode to EVEX
7252   if (UseAVX > 2 && !attributes->is_legacy_mode() &&
7253       !_is_managed && !attributes->is_evex_instruction()) {
7254     if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
7255       bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
7256       if (check_register_bank) {
7257         // check nds_enc and xreg_enc for upper bank usage
7258         if (nds_enc < 16 && xreg_enc < 16) {
7259           attributes->set_is_legacy_mode();
7260         }
7261       } else {
7262         attributes->set_is_legacy_mode();
7263       }
7264     }
7265   }
7266 
7267   _is_managed = false;
7268   if (UseAVX > 2 && !attributes->is_legacy_mode())
7269   {
7270     bool evex_r = (xreg_enc >= 16);
7271     bool evex_v = (nds_enc >= 16);
7272     attributes->set_is_evex_instruction();
7273     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
7274   } else {
7275     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
7276       attributes->set_rex_vex_w(false);
7277     }
7278     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
7279   }
7280 }
7281 
7282 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
7283   bool vex_r = ((dst_enc & 8) == 8) ? 1 : 0;
7284   bool vex_b = ((src_enc & 8) == 8) ? 1 : 0;
7285   bool vex_x = false;
7286   set_attributes(attributes);
7287   attributes->set_current_assembler(this);
7288   bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
7289 
7290   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
7291   if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
7292     switch (attributes->get_vector_len()) {
7293     case AVX_128bit:
7294     case AVX_256bit:
7295       if (check_register_bank) {
7296         if (dst_enc >= 16 || nds_enc >= 16 || src_enc >= 16) {
7297           // up propagate arithmetic instructions to meet RA requirements
7298           attributes->set_vector_len(AVX_512bit);
7299         } else {
7300           attributes->set_is_legacy_mode();
7301         }
7302       } else {
7303         attributes->set_is_legacy_mode();
7304       }
7305       break;
7306     }
7307   }
7308 
7309   // For pure EVEX check and see if this instruction
7310   // is allowed in legacy mode and has resources which will
7311   // fit in it.  Pure EVEX instructions will use set_is_evex_instruction in their definition,
7312   // else that field is set when we encode to EVEX
7313   if (UseAVX > 2 && !attributes->is_legacy_mode() &&
7314       !_is_managed && !attributes->is_evex_instruction()) {
7315     if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
7316       if (check_register_bank) {
7317         // check dst_enc, nds_enc and src_enc for upper bank usage
7318         if (dst_enc < 16 && nds_enc < 16 && src_enc < 16) {
7319           attributes->set_is_legacy_mode();
7320         }
7321       } else {
7322         attributes->set_is_legacy_mode();
7323       }
7324     }
7325   }
7326 
7327   _is_managed = false;
7328   if (UseAVX > 2 && !attributes->is_legacy_mode())
7329   {
7330     bool evex_r = (dst_enc >= 16);
7331     bool evex_v = (nds_enc >= 16);
7332     // can use vex_x as bank extender on rm encoding
7333     vex_x = (src_enc >= 16);
7334     attributes->set_is_evex_instruction();
7335     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
7336   } else {
7337     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
7338       attributes->set_rex_vex_w(false);
7339     }
7340     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
7341   }
7342 
7343   // return modrm byte components for operands
7344   return (((dst_enc & 7) << 3) | (src_enc & 7));
7345 }
7346 
7347 
7348 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
7349                             VexOpcode opc, InstructionAttr *attributes) {
7350   if (UseAVX > 0) {
7351     int xreg_enc = xreg->encoding();
7352     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
7353     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, attributes);
7354   } else {
7355     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
7356     rex_prefix(adr, xreg, pre, opc, attributes->is_rex_vex_w());
7357   }
7358 }
7359 
7360 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
7361                                       VexOpcode opc, InstructionAttr *attributes) {
7362   int dst_enc = dst->encoding();
7363   int src_enc = src->encoding();
7364   if (UseAVX > 0) {
7365     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
7366     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes);
7367   } else {
7368     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
7369     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, attributes->is_rex_vex_w());
7370   }
7371 }
7372 
7373 void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
7374   assert(VM_Version::supports_avx(), "");
7375   assert(!VM_Version::supports_evex(), "");
7376   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7377   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7378   emit_int8((unsigned char)0xC2);
7379   emit_int8((unsigned char)(0xC0 | encode));
7380   emit_int8((unsigned char)(0xF & cop));
7381 }
7382 
7383 void Assembler::blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
7384   assert(VM_Version::supports_avx(), "");
7385   assert(!VM_Version::supports_evex(), "");
7386   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7387   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7388   emit_int8((unsigned char)0x4B);
7389   emit_int8((unsigned char)(0xC0 | encode));
7390   int src2_enc = src2->encoding();
7391   emit_int8((unsigned char)(0xF0 & src2_enc<<4));
7392 }
7393 
7394 void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
7395   assert(VM_Version::supports_avx2(), "");
7396   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7397   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7398   emit_int8((unsigned char)0x02);
7399   emit_int8((unsigned char)(0xC0 | encode));
7400   emit_int8((unsigned char)imm8);
7401 }
7402 
7403 void Assembler::shlxl(Register dst, Register src1, Register src2) {
7404   assert(VM_Version::supports_bmi2(), "");
7405   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7406   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7407   emit_int8((unsigned char)0xF7);
7408   emit_int8((unsigned char)(0xC0 | encode));
7409 }
7410 
7411 void Assembler::shlxq(Register dst, Register src1, Register src2) {
7412   assert(VM_Version::supports_bmi2(), "");
7413   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7414   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7415   emit_int8((unsigned char)0xF7);
7416   emit_int8((unsigned char)(0xC0 | encode));
7417 }
7418 
7419 #ifndef _LP64
7420 
7421 void Assembler::incl(Register dst) {
7422   // Don't use it directly. Use MacroAssembler::incrementl() instead.
7423   emit_int8(0x40 | dst->encoding());
7424 }
7425 
7426 void Assembler::lea(Register dst, Address src) {
7427   leal(dst, src);
7428 }
7429 
7430 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) {
7431   InstructionMark im(this);
7432   emit_int8((unsigned char)0xC7);
7433   emit_operand(rax, dst);
7434   emit_data((int)imm32, rspec, 0);
7435 }
7436 
7437 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
7438   InstructionMark im(this);
7439   int encode = prefix_and_encode(dst->encoding());
7440   emit_int8((unsigned char)(0xB8 | encode));
7441   emit_data((int)imm32, rspec, 0);
7442 }
7443 
7444 void Assembler::popa() { // 32bit
7445   emit_int8(0x61);
7446 }
7447 
7448 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
7449   InstructionMark im(this);
7450   emit_int8(0x68);
7451   emit_data(imm32, rspec, 0);
7452 }
7453 
7454 void Assembler::pusha() { // 32bit
7455   emit_int8(0x60);
7456 }
7457 
7458 void Assembler::set_byte_if_not_zero(Register dst) {
7459   emit_int8(0x0F);
7460   emit_int8((unsigned char)0x95);
7461   emit_int8((unsigned char)(0xE0 | dst->encoding()));
7462 }
7463 
7464 void Assembler::shldl(Register dst, Register src) {
7465   emit_int8(0x0F);
7466   emit_int8((unsigned char)0xA5);
7467   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7468 }
7469 
7470 // 0F A4 / r ib
7471 void Assembler::shldl(Register dst, Register src, int8_t imm8) {
7472   emit_int8(0x0F);
7473   emit_int8((unsigned char)0xA4);
7474   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7475   emit_int8(imm8);
7476 }
7477 
7478 void Assembler::shrdl(Register dst, Register src) {
7479   emit_int8(0x0F);
7480   emit_int8((unsigned char)0xAD);
7481   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7482 }
7483 
7484 #else // LP64
7485 
7486 void Assembler::set_byte_if_not_zero(Register dst) {
7487   int enc = prefix_and_encode(dst->encoding(), true);
7488   emit_int8(0x0F);
7489   emit_int8((unsigned char)0x95);
7490   emit_int8((unsigned char)(0xE0 | enc));
7491 }
7492 
7493 // 64bit only pieces of the assembler
7494 // This should only be used by 64bit instructions that can use rip-relative
7495 // it cannot be used by instructions that want an immediate value.
7496 
7497 bool Assembler::reachable(AddressLiteral adr) {
7498   int64_t disp;
7499   // None will force a 64bit literal to the code stream. Likely a placeholder
7500   // for something that will be patched later and we need to certain it will
7501   // always be reachable.
7502   if (adr.reloc() == relocInfo::none) {
7503     return false;
7504   }
7505   if (adr.reloc() == relocInfo::internal_word_type) {
7506     // This should be rip relative and easily reachable.
7507     return true;
7508   }
7509   if (adr.reloc() == relocInfo::virtual_call_type ||
7510       adr.reloc() == relocInfo::opt_virtual_call_type ||
7511       adr.reloc() == relocInfo::static_call_type ||
7512       adr.reloc() == relocInfo::static_stub_type ) {
7513     // This should be rip relative within the code cache and easily
7514     // reachable until we get huge code caches. (At which point
7515     // ic code is going to have issues).
7516     return true;
7517   }
7518   if (adr.reloc() != relocInfo::external_word_type &&
7519       adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
7520       adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
7521       adr.reloc() != relocInfo::runtime_call_type ) {
7522     return false;
7523   }
7524 
7525   // Stress the correction code
7526   if (ForceUnreachable) {
7527     // Must be runtimecall reloc, see if it is in the codecache
7528     // Flipping stuff in the codecache to be unreachable causes issues
7529     // with things like inline caches where the additional instructions
7530     // are not handled.
7531     if (CodeCache::find_blob(adr._target) == NULL) {
7532       return false;
7533     }
7534   }
7535   // For external_word_type/runtime_call_type if it is reachable from where we
7536   // are now (possibly a temp buffer) and where we might end up
7537   // anywhere in the codeCache then we are always reachable.
7538   // This would have to change if we ever save/restore shared code
7539   // to be more pessimistic.
7540   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
7541   if (!is_simm32(disp)) return false;
7542   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
7543   if (!is_simm32(disp)) return false;
7544 
7545   disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
7546 
7547   // Because rip relative is a disp + address_of_next_instruction and we
7548   // don't know the value of address_of_next_instruction we apply a fudge factor
7549   // to make sure we will be ok no matter the size of the instruction we get placed into.
7550   // We don't have to fudge the checks above here because they are already worst case.
7551 
7552   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
7553   // + 4 because better safe than sorry.
7554   const int fudge = 12 + 4;
7555   if (disp < 0) {
7556     disp -= fudge;
7557   } else {
7558     disp += fudge;
7559   }
7560   return is_simm32(disp);
7561 }
7562 
7563 // Check if the polling page is not reachable from the code cache using rip-relative
7564 // addressing.
7565 bool Assembler::is_polling_page_far() {
7566   intptr_t addr = (intptr_t)os::get_polling_page();
7567   return ForceUnreachable ||
7568          !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
7569          !is_simm32(addr - (intptr_t)CodeCache::high_bound());
7570 }
7571 
7572 void Assembler::emit_data64(jlong data,
7573                             relocInfo::relocType rtype,
7574                             int format) {
7575   if (rtype == relocInfo::none) {
7576     emit_int64(data);
7577   } else {
7578     emit_data64(data, Relocation::spec_simple(rtype), format);
7579   }
7580 }
7581 
7582 void Assembler::emit_data64(jlong data,
7583                             RelocationHolder const& rspec,
7584                             int format) {
7585   assert(imm_operand == 0, "default format must be immediate in this file");
7586   assert(imm_operand == format, "must be immediate");
7587   assert(inst_mark() != NULL, "must be inside InstructionMark");
7588   // Do not use AbstractAssembler::relocate, which is not intended for
7589   // embedded words.  Instead, relocate to the enclosing instruction.
7590   code_section()->relocate(inst_mark(), rspec, format);
7591 #ifdef ASSERT
7592   check_relocation(rspec, format);
7593 #endif
7594   emit_int64(data);
7595 }
7596 
7597 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
7598   if (reg_enc >= 8) {
7599     prefix(REX_B);
7600     reg_enc -= 8;
7601   } else if (byteinst && reg_enc >= 4) {
7602     prefix(REX);
7603   }
7604   return reg_enc;
7605 }
7606 
7607 int Assembler::prefixq_and_encode(int reg_enc) {
7608   if (reg_enc < 8) {
7609     prefix(REX_W);
7610   } else {
7611     prefix(REX_WB);
7612     reg_enc -= 8;
7613   }
7614   return reg_enc;
7615 }
7616 
7617 int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte) {
7618   if (dst_enc < 8) {
7619     if (src_enc >= 8) {
7620       prefix(REX_B);
7621       src_enc -= 8;
7622     } else if ((src_is_byte && src_enc >= 4) || (dst_is_byte && dst_enc >= 4)) {
7623       prefix(REX);
7624     }
7625   } else {
7626     if (src_enc < 8) {
7627       prefix(REX_R);
7628     } else {
7629       prefix(REX_RB);
7630       src_enc -= 8;
7631     }
7632     dst_enc -= 8;
7633   }
7634   return dst_enc << 3 | src_enc;
7635 }
7636 
7637 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
7638   if (dst_enc < 8) {
7639     if (src_enc < 8) {
7640       prefix(REX_W);
7641     } else {
7642       prefix(REX_WB);
7643       src_enc -= 8;
7644     }
7645   } else {
7646     if (src_enc < 8) {
7647       prefix(REX_WR);
7648     } else {
7649       prefix(REX_WRB);
7650       src_enc -= 8;
7651     }
7652     dst_enc -= 8;
7653   }
7654   return dst_enc << 3 | src_enc;
7655 }
7656 
7657 void Assembler::prefix(Register reg) {
7658   if (reg->encoding() >= 8) {
7659     prefix(REX_B);
7660   }
7661 }
7662 
7663 void Assembler::prefix(Register dst, Register src, Prefix p) {
7664   if (src->encoding() >= 8) {
7665     p = (Prefix)(p | REX_B);
7666   }
7667   if (dst->encoding() >= 8) {
7668     p = (Prefix)( p | REX_R);
7669   }
7670   if (p != Prefix_EMPTY) {
7671     // do not generate an empty prefix
7672     prefix(p);
7673   }
7674 }
7675 
7676 void Assembler::prefix(Register dst, Address adr, Prefix p) {
7677   if (adr.base_needs_rex()) {
7678     if (adr.index_needs_rex()) {
7679       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
7680     } else {
7681       prefix(REX_B);
7682     }
7683   } else {
7684     if (adr.index_needs_rex()) {
7685       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
7686     }
7687   }
7688   if (dst->encoding() >= 8) {
7689     p = (Prefix)(p | REX_R);
7690   }
7691   if (p != Prefix_EMPTY) {
7692     // do not generate an empty prefix
7693     prefix(p);
7694   }
7695 }
7696 
7697 void Assembler::prefix(Address adr) {
7698   if (adr.base_needs_rex()) {
7699     if (adr.index_needs_rex()) {
7700       prefix(REX_XB);
7701     } else {
7702       prefix(REX_B);
7703     }
7704   } else {
7705     if (adr.index_needs_rex()) {
7706       prefix(REX_X);
7707     }
7708   }
7709 }
7710 
7711 void Assembler::prefixq(Address adr) {
7712   if (adr.base_needs_rex()) {
7713     if (adr.index_needs_rex()) {
7714       prefix(REX_WXB);
7715     } else {
7716       prefix(REX_WB);
7717     }
7718   } else {
7719     if (adr.index_needs_rex()) {
7720       prefix(REX_WX);
7721     } else {
7722       prefix(REX_W);
7723     }
7724   }
7725 }
7726 
7727 
7728 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
7729   if (reg->encoding() < 8) {
7730     if (adr.base_needs_rex()) {
7731       if (adr.index_needs_rex()) {
7732         prefix(REX_XB);
7733       } else {
7734         prefix(REX_B);
7735       }
7736     } else {
7737       if (adr.index_needs_rex()) {
7738         prefix(REX_X);
7739       } else if (byteinst && reg->encoding() >= 4 ) {
7740         prefix(REX);
7741       }
7742     }
7743   } else {
7744     if (adr.base_needs_rex()) {
7745       if (adr.index_needs_rex()) {
7746         prefix(REX_RXB);
7747       } else {
7748         prefix(REX_RB);
7749       }
7750     } else {
7751       if (adr.index_needs_rex()) {
7752         prefix(REX_RX);
7753       } else {
7754         prefix(REX_R);
7755       }
7756     }
7757   }
7758 }
7759 
7760 void Assembler::prefixq(Address adr, Register src) {
7761   if (src->encoding() < 8) {
7762     if (adr.base_needs_rex()) {
7763       if (adr.index_needs_rex()) {
7764         prefix(REX_WXB);
7765       } else {
7766         prefix(REX_WB);
7767       }
7768     } else {
7769       if (adr.index_needs_rex()) {
7770         prefix(REX_WX);
7771       } else {
7772         prefix(REX_W);
7773       }
7774     }
7775   } else {
7776     if (adr.base_needs_rex()) {
7777       if (adr.index_needs_rex()) {
7778         prefix(REX_WRXB);
7779       } else {
7780         prefix(REX_WRB);
7781       }
7782     } else {
7783       if (adr.index_needs_rex()) {
7784         prefix(REX_WRX);
7785       } else {
7786         prefix(REX_WR);
7787       }
7788     }
7789   }
7790 }
7791 
7792 void Assembler::prefix(Address adr, XMMRegister reg) {
7793   if (reg->encoding() < 8) {
7794     if (adr.base_needs_rex()) {
7795       if (adr.index_needs_rex()) {
7796         prefix(REX_XB);
7797       } else {
7798         prefix(REX_B);
7799       }
7800     } else {
7801       if (adr.index_needs_rex()) {
7802         prefix(REX_X);
7803       }
7804     }
7805   } else {
7806     if (adr.base_needs_rex()) {
7807       if (adr.index_needs_rex()) {
7808         prefix(REX_RXB);
7809       } else {
7810         prefix(REX_RB);
7811       }
7812     } else {
7813       if (adr.index_needs_rex()) {
7814         prefix(REX_RX);
7815       } else {
7816         prefix(REX_R);
7817       }
7818     }
7819   }
7820 }
7821 
7822 void Assembler::prefixq(Address adr, XMMRegister src) {
7823   if (src->encoding() < 8) {
7824     if (adr.base_needs_rex()) {
7825       if (adr.index_needs_rex()) {
7826         prefix(REX_WXB);
7827       } else {
7828         prefix(REX_WB);
7829       }
7830     } else {
7831       if (adr.index_needs_rex()) {
7832         prefix(REX_WX);
7833       } else {
7834         prefix(REX_W);
7835       }
7836     }
7837   } else {
7838     if (adr.base_needs_rex()) {
7839       if (adr.index_needs_rex()) {
7840         prefix(REX_WRXB);
7841       } else {
7842         prefix(REX_WRB);
7843       }
7844     } else {
7845       if (adr.index_needs_rex()) {
7846         prefix(REX_WRX);
7847       } else {
7848         prefix(REX_WR);
7849       }
7850     }
7851   }
7852 }
7853 
7854 void Assembler::adcq(Register dst, int32_t imm32) {
7855   (void) prefixq_and_encode(dst->encoding());
7856   emit_arith(0x81, 0xD0, dst, imm32);
7857 }
7858 
7859 void Assembler::adcq(Register dst, Address src) {
7860   InstructionMark im(this);
7861   prefixq(src, dst);
7862   emit_int8(0x13);
7863   emit_operand(dst, src);
7864 }
7865 
7866 void Assembler::adcq(Register dst, Register src) {
7867   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7868   emit_arith(0x13, 0xC0, dst, src);
7869 }
7870 
7871 void Assembler::addq(Address dst, int32_t imm32) {
7872   InstructionMark im(this);
7873   prefixq(dst);
7874   emit_arith_operand(0x81, rax, dst,imm32);
7875 }
7876 
7877 void Assembler::addq(Address dst, Register src) {
7878   InstructionMark im(this);
7879   prefixq(dst, src);
7880   emit_int8(0x01);
7881   emit_operand(src, dst);
7882 }
7883 
7884 void Assembler::addq(Register dst, int32_t imm32) {
7885   (void) prefixq_and_encode(dst->encoding());
7886   emit_arith(0x81, 0xC0, dst, imm32);
7887 }
7888 
7889 void Assembler::addq(Register dst, Address src) {
7890   InstructionMark im(this);
7891   prefixq(src, dst);
7892   emit_int8(0x03);
7893   emit_operand(dst, src);
7894 }
7895 
7896 void Assembler::addq(Register dst, Register src) {
7897   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7898   emit_arith(0x03, 0xC0, dst, src);
7899 }
7900 
7901 void Assembler::adcxq(Register dst, Register src) {
7902   //assert(VM_Version::supports_adx(), "adx instructions not supported");
7903   emit_int8((unsigned char)0x66);
7904   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7905   emit_int8(0x0F);
7906   emit_int8(0x38);
7907   emit_int8((unsigned char)0xF6);
7908   emit_int8((unsigned char)(0xC0 | encode));
7909 }
7910 
7911 void Assembler::adoxq(Register dst, Register src) {
7912   //assert(VM_Version::supports_adx(), "adx instructions not supported");
7913   emit_int8((unsigned char)0xF3);
7914   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7915   emit_int8(0x0F);
7916   emit_int8(0x38);
7917   emit_int8((unsigned char)0xF6);
7918   emit_int8((unsigned char)(0xC0 | encode));
7919 }
7920 
7921 void Assembler::andq(Address dst, int32_t imm32) {
7922   InstructionMark im(this);
7923   prefixq(dst);
7924   emit_int8((unsigned char)0x81);
7925   emit_operand(rsp, dst, 4);
7926   emit_int32(imm32);
7927 }
7928 
7929 void Assembler::andq(Register dst, int32_t imm32) {
7930   (void) prefixq_and_encode(dst->encoding());
7931   emit_arith(0x81, 0xE0, dst, imm32);
7932 }
7933 
7934 void Assembler::andq(Register dst, Address src) {
7935   InstructionMark im(this);
7936   prefixq(src, dst);
7937   emit_int8(0x23);
7938   emit_operand(dst, src);
7939 }
7940 
7941 void Assembler::andq(Register dst, Register src) {
7942   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7943   emit_arith(0x23, 0xC0, dst, src);
7944 }
7945 
7946 void Assembler::andnq(Register dst, Register src1, Register src2) {
7947   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
7948   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7949   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
7950   emit_int8((unsigned char)0xF2);
7951   emit_int8((unsigned char)(0xC0 | encode));
7952 }
7953 
7954 void Assembler::andnq(Register dst, Register src1, Address src2) {
7955   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
7956   InstructionMark im(this);
7957   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7958   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
7959   emit_int8((unsigned char)0xF2);
7960   emit_operand(dst, src2);
7961 }
7962 
7963 void Assembler::bsfq(Register dst, Register src) {
7964   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7965   emit_int8(0x0F);
7966   emit_int8((unsigned char)0xBC);
7967   emit_int8((unsigned char)(0xC0 | encode));
7968 }
7969 
7970 void Assembler::bsrq(Register dst, Register src) {
7971   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7972   emit_int8(0x0F);
7973   emit_int8((unsigned char)0xBD);
7974   emit_int8((unsigned char)(0xC0 | encode));
7975 }
7976 
7977 void Assembler::bswapq(Register reg) {
7978   int encode = prefixq_and_encode(reg->encoding());
7979   emit_int8(0x0F);
7980   emit_int8((unsigned char)(0xC8 | encode));
7981 }
7982 
7983 void Assembler::blsiq(Register dst, Register src) {
7984   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
7985   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7986   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
7987   emit_int8((unsigned char)0xF3);
7988   emit_int8((unsigned char)(0xC0 | encode));
7989 }
7990 
7991 void Assembler::blsiq(Register dst, Address src) {
7992   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
7993   InstructionMark im(this);
7994   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7995   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
7996   emit_int8((unsigned char)0xF3);
7997   emit_operand(rbx, src);
7998 }
7999 
8000 void Assembler::blsmskq(Register dst, Register src) {
8001   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8002   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
8003   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8004   emit_int8((unsigned char)0xF3);
8005   emit_int8((unsigned char)(0xC0 | encode));
8006 }
8007 
8008 void Assembler::blsmskq(Register dst, Address src) {
8009   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8010   InstructionMark im(this);
8011   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
8012   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8013   emit_int8((unsigned char)0xF3);
8014   emit_operand(rdx, src);
8015 }
8016 
8017 void Assembler::blsrq(Register dst, Register src) {
8018   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8019   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
8020   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8021   emit_int8((unsigned char)0xF3);
8022   emit_int8((unsigned char)(0xC0 | encode));
8023 }
8024 
8025 void Assembler::blsrq(Register dst, Address src) {
8026   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8027   InstructionMark im(this);
8028   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
8029   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8030   emit_int8((unsigned char)0xF3);
8031   emit_operand(rcx, src);
8032 }
8033 
8034 void Assembler::cdqq() {
8035   prefix(REX_W);
8036   emit_int8((unsigned char)0x99);
8037 }
8038 
8039 void Assembler::clflush(Address adr) {
8040   prefix(adr);
8041   emit_int8(0x0F);
8042   emit_int8((unsigned char)0xAE);
8043   emit_operand(rdi, adr);
8044 }
8045 
8046 void Assembler::cmovq(Condition cc, Register dst, Register src) {
8047   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8048   emit_int8(0x0F);
8049   emit_int8(0x40 | cc);
8050   emit_int8((unsigned char)(0xC0 | encode));
8051 }
8052 
8053 void Assembler::cmovq(Condition cc, Register dst, Address src) {
8054   InstructionMark im(this);
8055   prefixq(src, dst);
8056   emit_int8(0x0F);
8057   emit_int8(0x40 | cc);
8058   emit_operand(dst, src);
8059 }
8060 
8061 void Assembler::cmpq(Address dst, int32_t imm32) {
8062   InstructionMark im(this);
8063   prefixq(dst);
8064   emit_int8((unsigned char)0x81);
8065   emit_operand(rdi, dst, 4);
8066   emit_int32(imm32);
8067 }
8068 
8069 void Assembler::cmpq(Register dst, int32_t imm32) {
8070   (void) prefixq_and_encode(dst->encoding());
8071   emit_arith(0x81, 0xF8, dst, imm32);
8072 }
8073 
8074 void Assembler::cmpq(Address dst, Register src) {
8075   InstructionMark im(this);
8076   prefixq(dst, src);
8077   emit_int8(0x3B);
8078   emit_operand(src, dst);
8079 }
8080 
8081 void Assembler::cmpq(Register dst, Register src) {
8082   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8083   emit_arith(0x3B, 0xC0, dst, src);
8084 }
8085 
8086 void Assembler::cmpq(Register dst, Address  src) {
8087   InstructionMark im(this);
8088   prefixq(src, dst);
8089   emit_int8(0x3B);
8090   emit_operand(dst, src);
8091 }
8092 
8093 void Assembler::cmpxchgq(Register reg, Address adr) {
8094   InstructionMark im(this);
8095   prefixq(adr, reg);
8096   emit_int8(0x0F);
8097   emit_int8((unsigned char)0xB1);
8098   emit_operand(reg, adr);
8099 }
8100 
8101 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
8102   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8103   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8104   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8105   emit_int8(0x2A);
8106   emit_int8((unsigned char)(0xC0 | encode));
8107 }
8108 
8109 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
8110   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8111   InstructionMark im(this);
8112   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8113   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
8114   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8115   emit_int8(0x2A);
8116   emit_operand(dst, src);
8117 }
8118 
8119 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
8120   NOT_LP64(assert(VM_Version::supports_sse(), ""));
8121   InstructionMark im(this);
8122   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8123   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
8124   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
8125   emit_int8(0x2A);
8126   emit_operand(dst, src);
8127 }
8128 
8129 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
8130   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8131   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8132   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8133   emit_int8(0x2C);
8134   emit_int8((unsigned char)(0xC0 | encode));
8135 }
8136 
8137 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
8138   NOT_LP64(assert(VM_Version::supports_sse(), ""));
8139   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8140   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
8141   emit_int8(0x2C);
8142   emit_int8((unsigned char)(0xC0 | encode));
8143 }
8144 
8145 void Assembler::decl(Register dst) {
8146   // Don't use it directly. Use MacroAssembler::decrementl() instead.
8147   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
8148   int encode = prefix_and_encode(dst->encoding());
8149   emit_int8((unsigned char)0xFF);
8150   emit_int8((unsigned char)(0xC8 | encode));
8151 }
8152 
8153 void Assembler::decq(Register dst) {
8154   // Don't use it directly. Use MacroAssembler::decrementq() instead.
8155   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8156   int encode = prefixq_and_encode(dst->encoding());
8157   emit_int8((unsigned char)0xFF);
8158   emit_int8(0xC8 | encode);
8159 }
8160 
8161 void Assembler::decq(Address dst) {
8162   // Don't use it directly. Use MacroAssembler::decrementq() instead.
8163   InstructionMark im(this);
8164   prefixq(dst);
8165   emit_int8((unsigned char)0xFF);
8166   emit_operand(rcx, dst);
8167 }
8168 
8169 void Assembler::fxrstor(Address src) {
8170   prefixq(src);
8171   emit_int8(0x0F);
8172   emit_int8((unsigned char)0xAE);
8173   emit_operand(as_Register(1), src);
8174 }
8175 
8176 void Assembler::xrstor(Address src) {
8177   prefixq(src);
8178   emit_int8(0x0F);
8179   emit_int8((unsigned char)0xAE);
8180   emit_operand(as_Register(5), src);
8181 }
8182 
8183 void Assembler::fxsave(Address dst) {
8184   prefixq(dst);
8185   emit_int8(0x0F);
8186   emit_int8((unsigned char)0xAE);
8187   emit_operand(as_Register(0), dst);
8188 }
8189 
8190 void Assembler::xsave(Address dst) {
8191   prefixq(dst);
8192   emit_int8(0x0F);
8193   emit_int8((unsigned char)0xAE);
8194   emit_operand(as_Register(4), dst);
8195 }
8196 
8197 void Assembler::idivq(Register src) {
8198   int encode = prefixq_and_encode(src->encoding());
8199   emit_int8((unsigned char)0xF7);
8200   emit_int8((unsigned char)(0xF8 | encode));
8201 }
8202 
8203 void Assembler::imulq(Register dst, Register src) {
8204   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8205   emit_int8(0x0F);
8206   emit_int8((unsigned char)0xAF);
8207   emit_int8((unsigned char)(0xC0 | encode));
8208 }
8209 
8210 void Assembler::imulq(Register dst, Register src, int value) {
8211   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8212   if (is8bit(value)) {
8213     emit_int8(0x6B);
8214     emit_int8((unsigned char)(0xC0 | encode));
8215     emit_int8(value & 0xFF);
8216   } else {
8217     emit_int8(0x69);
8218     emit_int8((unsigned char)(0xC0 | encode));
8219     emit_int32(value);
8220   }
8221 }
8222 
8223 void Assembler::imulq(Register dst, Address src) {
8224   InstructionMark im(this);
8225   prefixq(src, dst);
8226   emit_int8(0x0F);
8227   emit_int8((unsigned char) 0xAF);
8228   emit_operand(dst, src);
8229 }
8230 
8231 void Assembler::incl(Register dst) {
8232   // Don't use it directly. Use MacroAssembler::incrementl() instead.
8233   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8234   int encode = prefix_and_encode(dst->encoding());
8235   emit_int8((unsigned char)0xFF);
8236   emit_int8((unsigned char)(0xC0 | encode));
8237 }
8238 
8239 void Assembler::incq(Register dst) {
8240   // Don't use it directly. Use MacroAssembler::incrementq() instead.
8241   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8242   int encode = prefixq_and_encode(dst->encoding());
8243   emit_int8((unsigned char)0xFF);
8244   emit_int8((unsigned char)(0xC0 | encode));
8245 }
8246 
8247 void Assembler::incq(Address dst) {
8248   // Don't use it directly. Use MacroAssembler::incrementq() instead.
8249   InstructionMark im(this);
8250   prefixq(dst);
8251   emit_int8((unsigned char)0xFF);
8252   emit_operand(rax, dst);
8253 }
8254 
8255 void Assembler::lea(Register dst, Address src) {
8256   leaq(dst, src);
8257 }
8258 
8259 void Assembler::leaq(Register dst, Address src) {
8260   InstructionMark im(this);
8261   prefixq(src, dst);
8262   emit_int8((unsigned char)0x8D);
8263   emit_operand(dst, src);
8264 }
8265 
8266 void Assembler::mov64(Register dst, int64_t imm64) {
8267   InstructionMark im(this);
8268   int encode = prefixq_and_encode(dst->encoding());
8269   emit_int8((unsigned char)(0xB8 | encode));
8270   emit_int64(imm64);
8271 }
8272 
8273 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
8274   InstructionMark im(this);
8275   int encode = prefixq_and_encode(dst->encoding());
8276   emit_int8(0xB8 | encode);
8277   emit_data64(imm64, rspec);
8278 }
8279 
8280 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
8281   InstructionMark im(this);
8282   int encode = prefix_and_encode(dst->encoding());
8283   emit_int8((unsigned char)(0xB8 | encode));
8284   emit_data((int)imm32, rspec, narrow_oop_operand);
8285 }
8286 
8287 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
8288   InstructionMark im(this);
8289   prefix(dst);
8290   emit_int8((unsigned char)0xC7);
8291   emit_operand(rax, dst, 4);
8292   emit_data((int)imm32, rspec, narrow_oop_operand);
8293 }
8294 
8295 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
8296   InstructionMark im(this);
8297   int encode = prefix_and_encode(src1->encoding());
8298   emit_int8((unsigned char)0x81);
8299   emit_int8((unsigned char)(0xF8 | encode));
8300   emit_data((int)imm32, rspec, narrow_oop_operand);
8301 }
8302 
8303 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
8304   InstructionMark im(this);
8305   prefix(src1);
8306   emit_int8((unsigned char)0x81);
8307   emit_operand(rax, src1, 4);
8308   emit_data((int)imm32, rspec, narrow_oop_operand);
8309 }
8310 
8311 void Assembler::lzcntq(Register dst, Register src) {
8312   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
8313   emit_int8((unsigned char)0xF3);
8314   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8315   emit_int8(0x0F);
8316   emit_int8((unsigned char)0xBD);
8317   emit_int8((unsigned char)(0xC0 | encode));
8318 }
8319 
8320 void Assembler::movdq(XMMRegister dst, Register src) {
8321   // table D-1 says MMX/SSE2
8322   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8323   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8324   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8325   emit_int8(0x6E);
8326   emit_int8((unsigned char)(0xC0 | encode));
8327 }
8328 
8329 void Assembler::movdq(Register dst, XMMRegister src) {
8330   // table D-1 says MMX/SSE2
8331   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8332   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8333   // swap src/dst to get correct prefix
8334   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8335   emit_int8(0x7E);
8336   emit_int8((unsigned char)(0xC0 | encode));
8337 }
8338 
8339 void Assembler::movq(Register dst, Register src) {
8340   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8341   emit_int8((unsigned char)0x8B);
8342   emit_int8((unsigned char)(0xC0 | encode));
8343 }
8344 
8345 void Assembler::movq(Register dst, Address src) {
8346   InstructionMark im(this);
8347   prefixq(src, dst);
8348   emit_int8((unsigned char)0x8B);
8349   emit_operand(dst, src);
8350 }
8351 
8352 void Assembler::movq(Address dst, Register src) {
8353   InstructionMark im(this);
8354   prefixq(dst, src);
8355   emit_int8((unsigned char)0x89);
8356   emit_operand(src, dst);
8357 }
8358 
8359 void Assembler::movsbq(Register dst, Address src) {
8360   InstructionMark im(this);
8361   prefixq(src, dst);
8362   emit_int8(0x0F);
8363   emit_int8((unsigned char)0xBE);
8364   emit_operand(dst, src);
8365 }
8366 
8367 void Assembler::movsbq(Register dst, Register src) {
8368   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8369   emit_int8(0x0F);
8370   emit_int8((unsigned char)0xBE);
8371   emit_int8((unsigned char)(0xC0 | encode));
8372 }
8373 
8374 void Assembler::movslq(Register dst, int32_t imm32) {
8375   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
8376   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
8377   // as a result we shouldn't use until tested at runtime...
8378   ShouldNotReachHere();
8379   InstructionMark im(this);
8380   int encode = prefixq_and_encode(dst->encoding());
8381   emit_int8((unsigned char)(0xC7 | encode));
8382   emit_int32(imm32);
8383 }
8384 
8385 void Assembler::movslq(Address dst, int32_t imm32) {
8386   assert(is_simm32(imm32), "lost bits");
8387   InstructionMark im(this);
8388   prefixq(dst);
8389   emit_int8((unsigned char)0xC7);
8390   emit_operand(rax, dst, 4);
8391   emit_int32(imm32);
8392 }
8393 
8394 void Assembler::movslq(Register dst, Address src) {
8395   InstructionMark im(this);
8396   prefixq(src, dst);
8397   emit_int8(0x63);
8398   emit_operand(dst, src);
8399 }
8400 
8401 void Assembler::movslq(Register dst, Register src) {
8402   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8403   emit_int8(0x63);
8404   emit_int8((unsigned char)(0xC0 | encode));
8405 }
8406 
8407 void Assembler::movswq(Register dst, Address src) {
8408   InstructionMark im(this);
8409   prefixq(src, dst);
8410   emit_int8(0x0F);
8411   emit_int8((unsigned char)0xBF);
8412   emit_operand(dst, src);
8413 }
8414 
8415 void Assembler::movswq(Register dst, Register src) {
8416   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8417   emit_int8((unsigned char)0x0F);
8418   emit_int8((unsigned char)0xBF);
8419   emit_int8((unsigned char)(0xC0 | encode));
8420 }
8421 
8422 void Assembler::movzbq(Register dst, Address src) {
8423   InstructionMark im(this);
8424   prefixq(src, dst);
8425   emit_int8((unsigned char)0x0F);
8426   emit_int8((unsigned char)0xB6);
8427   emit_operand(dst, src);
8428 }
8429 
8430 void Assembler::movzbq(Register dst, Register src) {
8431   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8432   emit_int8(0x0F);
8433   emit_int8((unsigned char)0xB6);
8434   emit_int8(0xC0 | encode);
8435 }
8436 
8437 void Assembler::movzwq(Register dst, Address src) {
8438   InstructionMark im(this);
8439   prefixq(src, dst);
8440   emit_int8((unsigned char)0x0F);
8441   emit_int8((unsigned char)0xB7);
8442   emit_operand(dst, src);
8443 }
8444 
8445 void Assembler::movzwq(Register dst, Register src) {
8446   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8447   emit_int8((unsigned char)0x0F);
8448   emit_int8((unsigned char)0xB7);
8449   emit_int8((unsigned char)(0xC0 | encode));
8450 }
8451 
8452 void Assembler::mulq(Address src) {
8453   InstructionMark im(this);
8454   prefixq(src);
8455   emit_int8((unsigned char)0xF7);
8456   emit_operand(rsp, src);
8457 }
8458 
8459 void Assembler::mulq(Register src) {
8460   int encode = prefixq_and_encode(src->encoding());
8461   emit_int8((unsigned char)0xF7);
8462   emit_int8((unsigned char)(0xE0 | encode));
8463 }
8464 
8465 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
8466   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8467   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
8468   int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
8469   emit_int8((unsigned char)0xF6);
8470   emit_int8((unsigned char)(0xC0 | encode));
8471 }
8472 
8473 void Assembler::negq(Register dst) {
8474   int encode = prefixq_and_encode(dst->encoding());
8475   emit_int8((unsigned char)0xF7);
8476   emit_int8((unsigned char)(0xD8 | encode));
8477 }
8478 
8479 void Assembler::notq(Register dst) {
8480   int encode = prefixq_and_encode(dst->encoding());
8481   emit_int8((unsigned char)0xF7);
8482   emit_int8((unsigned char)(0xD0 | encode));
8483 }
8484 
8485 void Assembler::orq(Address dst, int32_t imm32) {
8486   InstructionMark im(this);
8487   prefixq(dst);
8488   emit_int8((unsigned char)0x81);
8489   emit_operand(rcx, dst, 4);
8490   emit_int32(imm32);
8491 }
8492 
8493 void Assembler::orq(Register dst, int32_t imm32) {
8494   (void) prefixq_and_encode(dst->encoding());
8495   emit_arith(0x81, 0xC8, dst, imm32);
8496 }
8497 
8498 void Assembler::orq(Register dst, Address src) {
8499   InstructionMark im(this);
8500   prefixq(src, dst);
8501   emit_int8(0x0B);
8502   emit_operand(dst, src);
8503 }
8504 
8505 void Assembler::orq(Register dst, Register src) {
8506   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8507   emit_arith(0x0B, 0xC0, dst, src);
8508 }
8509 
8510 void Assembler::popa() { // 64bit
8511   movq(r15, Address(rsp, 0));
8512   movq(r14, Address(rsp, wordSize));
8513   movq(r13, Address(rsp, 2 * wordSize));
8514   movq(r12, Address(rsp, 3 * wordSize));
8515   movq(r11, Address(rsp, 4 * wordSize));
8516   movq(r10, Address(rsp, 5 * wordSize));
8517   movq(r9,  Address(rsp, 6 * wordSize));
8518   movq(r8,  Address(rsp, 7 * wordSize));
8519   movq(rdi, Address(rsp, 8 * wordSize));
8520   movq(rsi, Address(rsp, 9 * wordSize));
8521   movq(rbp, Address(rsp, 10 * wordSize));
8522   // skip rsp
8523   movq(rbx, Address(rsp, 12 * wordSize));
8524   movq(rdx, Address(rsp, 13 * wordSize));
8525   movq(rcx, Address(rsp, 14 * wordSize));
8526   movq(rax, Address(rsp, 15 * wordSize));
8527 
8528   addq(rsp, 16 * wordSize);
8529 }
8530 
8531 void Assembler::popcntq(Register dst, Address src) {
8532   assert(VM_Version::supports_popcnt(), "must support");
8533   InstructionMark im(this);
8534   emit_int8((unsigned char)0xF3);
8535   prefixq(src, dst);
8536   emit_int8((unsigned char)0x0F);
8537   emit_int8((unsigned char)0xB8);
8538   emit_operand(dst, src);
8539 }
8540 
8541 void Assembler::popcntq(Register dst, Register src) {
8542   assert(VM_Version::supports_popcnt(), "must support");
8543   emit_int8((unsigned char)0xF3);
8544   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8545   emit_int8((unsigned char)0x0F);
8546   emit_int8((unsigned char)0xB8);
8547   emit_int8((unsigned char)(0xC0 | encode));
8548 }
8549 
8550 void Assembler::popq(Address dst) {
8551   InstructionMark im(this);
8552   prefixq(dst);
8553   emit_int8((unsigned char)0x8F);
8554   emit_operand(rax, dst);
8555 }
8556 
8557 void Assembler::pusha() { // 64bit
8558   // we have to store original rsp.  ABI says that 128 bytes
8559   // below rsp are local scratch.
8560   movq(Address(rsp, -5 * wordSize), rsp);
8561 
8562   subq(rsp, 16 * wordSize);
8563 
8564   movq(Address(rsp, 15 * wordSize), rax);
8565   movq(Address(rsp, 14 * wordSize), rcx);
8566   movq(Address(rsp, 13 * wordSize), rdx);
8567   movq(Address(rsp, 12 * wordSize), rbx);
8568   // skip rsp
8569   movq(Address(rsp, 10 * wordSize), rbp);
8570   movq(Address(rsp, 9 * wordSize), rsi);
8571   movq(Address(rsp, 8 * wordSize), rdi);
8572   movq(Address(rsp, 7 * wordSize), r8);
8573   movq(Address(rsp, 6 * wordSize), r9);
8574   movq(Address(rsp, 5 * wordSize), r10);
8575   movq(Address(rsp, 4 * wordSize), r11);
8576   movq(Address(rsp, 3 * wordSize), r12);
8577   movq(Address(rsp, 2 * wordSize), r13);
8578   movq(Address(rsp, wordSize), r14);
8579   movq(Address(rsp, 0), r15);
8580 }
8581 
8582 void Assembler::pushq(Address src) {
8583   InstructionMark im(this);
8584   prefixq(src);
8585   emit_int8((unsigned char)0xFF);
8586   emit_operand(rsi, src);
8587 }
8588 
8589 void Assembler::rclq(Register dst, int imm8) {
8590   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8591   int encode = prefixq_and_encode(dst->encoding());
8592   if (imm8 == 1) {
8593     emit_int8((unsigned char)0xD1);
8594     emit_int8((unsigned char)(0xD0 | encode));
8595   } else {
8596     emit_int8((unsigned char)0xC1);
8597     emit_int8((unsigned char)(0xD0 | encode));
8598     emit_int8(imm8);
8599   }
8600 }
8601 
8602 void Assembler::rcrq(Register dst, int imm8) {
8603   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8604   int encode = prefixq_and_encode(dst->encoding());
8605   if (imm8 == 1) {
8606     emit_int8((unsigned char)0xD1);
8607     emit_int8((unsigned char)(0xD8 | encode));
8608   } else {
8609     emit_int8((unsigned char)0xC1);
8610     emit_int8((unsigned char)(0xD8 | encode));
8611     emit_int8(imm8);
8612   }
8613 }
8614 
8615 void Assembler::rorq(Register dst, int imm8) {
8616   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8617   int encode = prefixq_and_encode(dst->encoding());
8618   if (imm8 == 1) {
8619     emit_int8((unsigned char)0xD1);
8620     emit_int8((unsigned char)(0xC8 | encode));
8621   } else {
8622     emit_int8((unsigned char)0xC1);
8623     emit_int8((unsigned char)(0xc8 | encode));
8624     emit_int8(imm8);
8625   }
8626 }
8627 
8628 void Assembler::rorxq(Register dst, Register src, int imm8) {
8629   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8630   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
8631   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
8632   emit_int8((unsigned char)0xF0);
8633   emit_int8((unsigned char)(0xC0 | encode));
8634   emit_int8(imm8);
8635 }
8636 
8637 void Assembler::rorxd(Register dst, Register src, int imm8) {
8638   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8639   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
8640   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
8641   emit_int8((unsigned char)0xF0);
8642   emit_int8((unsigned char)(0xC0 | encode));
8643   emit_int8(imm8);
8644 }
8645 
8646 void Assembler::sarq(Register dst, int imm8) {
8647   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8648   int encode = prefixq_and_encode(dst->encoding());
8649   if (imm8 == 1) {
8650     emit_int8((unsigned char)0xD1);
8651     emit_int8((unsigned char)(0xF8 | encode));
8652   } else {
8653     emit_int8((unsigned char)0xC1);
8654     emit_int8((unsigned char)(0xF8 | encode));
8655     emit_int8(imm8);
8656   }
8657 }
8658 
8659 void Assembler::sarq(Register dst) {
8660   int encode = prefixq_and_encode(dst->encoding());
8661   emit_int8((unsigned char)0xD3);
8662   emit_int8((unsigned char)(0xF8 | encode));
8663 }
8664 
8665 void Assembler::sbbq(Address dst, int32_t imm32) {
8666   InstructionMark im(this);
8667   prefixq(dst);
8668   emit_arith_operand(0x81, rbx, dst, imm32);
8669 }
8670 
8671 void Assembler::sbbq(Register dst, int32_t imm32) {
8672   (void) prefixq_and_encode(dst->encoding());
8673   emit_arith(0x81, 0xD8, dst, imm32);
8674 }
8675 
8676 void Assembler::sbbq(Register dst, Address src) {
8677   InstructionMark im(this);
8678   prefixq(src, dst);
8679   emit_int8(0x1B);
8680   emit_operand(dst, src);
8681 }
8682 
8683 void Assembler::sbbq(Register dst, Register src) {
8684   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8685   emit_arith(0x1B, 0xC0, dst, src);
8686 }
8687 
8688 void Assembler::shlq(Register dst, int imm8) {
8689   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8690   int encode = prefixq_and_encode(dst->encoding());
8691   if (imm8 == 1) {
8692     emit_int8((unsigned char)0xD1);
8693     emit_int8((unsigned char)(0xE0 | encode));
8694   } else {
8695     emit_int8((unsigned char)0xC1);
8696     emit_int8((unsigned char)(0xE0 | encode));
8697     emit_int8(imm8);
8698   }
8699 }
8700 
8701 void Assembler::shlq(Register dst) {
8702   int encode = prefixq_and_encode(dst->encoding());
8703   emit_int8((unsigned char)0xD3);
8704   emit_int8((unsigned char)(0xE0 | encode));
8705 }
8706 
8707 void Assembler::shrq(Register dst, int imm8) {
8708   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8709   int encode = prefixq_and_encode(dst->encoding());
8710   emit_int8((unsigned char)0xC1);
8711   emit_int8((unsigned char)(0xE8 | encode));
8712   emit_int8(imm8);
8713 }
8714 
8715 void Assembler::shrq(Register dst) {
8716   int encode = prefixq_and_encode(dst->encoding());
8717   emit_int8((unsigned char)0xD3);
8718   emit_int8(0xE8 | encode);
8719 }
8720 
8721 void Assembler::subq(Address dst, int32_t imm32) {
8722   InstructionMark im(this);
8723   prefixq(dst);
8724   emit_arith_operand(0x81, rbp, dst, imm32);
8725 }
8726 
8727 void Assembler::subq(Address dst, Register src) {
8728   InstructionMark im(this);
8729   prefixq(dst, src);
8730   emit_int8(0x29);
8731   emit_operand(src, dst);
8732 }
8733 
8734 void Assembler::subq(Register dst, int32_t imm32) {
8735   (void) prefixq_and_encode(dst->encoding());
8736   emit_arith(0x81, 0xE8, dst, imm32);
8737 }
8738 
8739 // Force generation of a 4 byte immediate value even if it fits into 8bit
8740 void Assembler::subq_imm32(Register dst, int32_t imm32) {
8741   (void) prefixq_and_encode(dst->encoding());
8742   emit_arith_imm32(0x81, 0xE8, dst, imm32);
8743 }
8744 
8745 void Assembler::subq(Register dst, Address src) {
8746   InstructionMark im(this);
8747   prefixq(src, dst);
8748   emit_int8(0x2B);
8749   emit_operand(dst, src);
8750 }
8751 
8752 void Assembler::subq(Register dst, Register src) {
8753   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8754   emit_arith(0x2B, 0xC0, dst, src);
8755 }
8756 
8757 void Assembler::testq(Register dst, int32_t imm32) {
8758   // not using emit_arith because test
8759   // doesn't support sign-extension of
8760   // 8bit operands
8761   int encode = dst->encoding();
8762   if (encode == 0) {
8763     prefix(REX_W);
8764     emit_int8((unsigned char)0xA9);
8765   } else {
8766     encode = prefixq_and_encode(encode);
8767     emit_int8((unsigned char)0xF7);
8768     emit_int8((unsigned char)(0xC0 | encode));
8769   }
8770   emit_int32(imm32);
8771 }
8772 
8773 void Assembler::testq(Register dst, Register src) {
8774   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8775   emit_arith(0x85, 0xC0, dst, src);
8776 }
8777 
8778 void Assembler::xaddq(Address dst, Register src) {
8779   InstructionMark im(this);
8780   prefixq(dst, src);
8781   emit_int8(0x0F);
8782   emit_int8((unsigned char)0xC1);
8783   emit_operand(src, dst);
8784 }
8785 
8786 void Assembler::xchgq(Register dst, Address src) {
8787   InstructionMark im(this);
8788   prefixq(src, dst);
8789   emit_int8((unsigned char)0x87);
8790   emit_operand(dst, src);
8791 }
8792 
8793 void Assembler::xchgq(Register dst, Register src) {
8794   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8795   emit_int8((unsigned char)0x87);
8796   emit_int8((unsigned char)(0xc0 | encode));
8797 }
8798 
8799 void Assembler::xorq(Register dst, Register src) {
8800   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8801   emit_arith(0x33, 0xC0, dst, src);
8802 }
8803 
8804 void Assembler::xorq(Register dst, Address src) {
8805   InstructionMark im(this);
8806   prefixq(src, dst);
8807   emit_int8(0x33);
8808   emit_operand(dst, src);
8809 }
8810 
8811 #endif // !LP64